Commit e3c1b071 authored by shaoyunl's avatar shaoyunl Committed by Alex Deucher

drm/amdgpu: Reset the devices in the XGMI hive duirng probe

In passthrough configuration, hypervisior will trigger the SBR(Secondary bus reset) to the devices
without sync to each other. This could cause device hang since for XGMI configuration, all the devices
within the hive need to be reset at a limit time slot. This serial of patches try to solve this issue
by co-operate with new SMU which will only do minimum house keeping to response the SBR request but don't
do the real reset job and leave it to driver. Driver need to do the whole sw init and minimum HW init
to bring up the SMU and trigger the reset(possibly BACO) on all the ASICs at the same time
Signed-off-by: default avatarshaoyunl <shaoyun.liu@amd.com>
Acked-by: Andrey Grodzovsky andrey.grodzovsky@amd.com
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 655ce9cb
...@@ -124,6 +124,10 @@ struct amdgpu_mgpu_info ...@@ -124,6 +124,10 @@ struct amdgpu_mgpu_info
uint32_t num_gpu; uint32_t num_gpu;
uint32_t num_dgpu; uint32_t num_dgpu;
uint32_t num_apu; uint32_t num_apu;
/* delayed reset_func for XGMI configuration if necessary */
struct delayed_work delayed_reset_work;
bool pending_reset;
}; };
struct amdgpu_watchdog_timer struct amdgpu_watchdog_timer
...@@ -1128,6 +1132,15 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, ...@@ -1128,6 +1132,15 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job,
bool *need_full_reset_arg);
int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
struct list_head *device_list_handle,
bool *need_full_reset_arg,
bool skip_hw_reset);
int emu_soc_asic_init(struct amdgpu_device *adev); int emu_soc_asic_init(struct amdgpu_device *adev);
/* /*
......
...@@ -1224,6 +1224,10 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) ...@@ -1224,6 +1224,10 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
} }
} }
/* Don't post if we need to reset whole hive on init */
if (adev->gmc.xgmi.pending_reset)
return false;
if (adev->has_hw_reset) { if (adev->has_hw_reset) {
adev->has_hw_reset = false; adev->has_hw_reset = false;
return true; return true;
...@@ -2154,6 +2158,9 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) ...@@ -2154,6 +2158,9 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
continue; continue;
if (!adev->ip_blocks[i].status.sw)
continue;
/* no need to do the fw loading again if already done*/ /* no need to do the fw loading again if already done*/
if (adev->ip_blocks[i].status.hw == true) if (adev->ip_blocks[i].status.hw == true)
break; break;
...@@ -2294,7 +2301,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) ...@@ -2294,7 +2301,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (adev->gmc.xgmi.num_physical_nodes > 1) if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev); amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
amdgpu_fru_get_product_info(adev); amdgpu_fru_get_product_info(adev);
...@@ -2739,6 +2749,16 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) ...@@ -2739,6 +2749,16 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false; adev->ip_blocks[i].status.hw = false;
continue; continue;
} }
/* skip unnecessary suspend if we do not initialize them yet */
if (adev->gmc.xgmi.pending_reset &&
!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
adev->ip_blocks[i].status.hw = false;
continue;
}
/* XXX handle errors */ /* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev); r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */ /* XXX handle errors */
...@@ -3414,10 +3434,28 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -3414,10 +3434,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* E.g., driver was not cleanly unloaded previously, etc. * E.g., driver was not cleanly unloaded previously, etc.
*/ */
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
r = amdgpu_asic_reset(adev); if (adev->gmc.xgmi.num_physical_nodes) {
if (r) { dev_info(adev->dev, "Pending hive reset.\n");
dev_err(adev->dev, "asic reset on init failed\n"); adev->gmc.xgmi.pending_reset = true;
goto failed; /* Only need to init necessary block for SMU to handle the reset */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
DRM_DEBUG("IP %s disabed for hw_init.\n",
adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.hw = true;
}
}
} else {
r = amdgpu_asic_reset(adev);
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
}
} }
} }
...@@ -3548,19 +3586,19 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -3548,19 +3586,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* enable clockgating, etc. after ib tests, etc. since some blocks require /* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically. * explicit gating rather than handling it automatically.
*/ */
r = amdgpu_device_ip_late_init(adev); if (!adev->gmc.xgmi.pending_reset) {
if (r) { r = amdgpu_device_ip_late_init(adev);
dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); if (r) {
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
goto failed; amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
goto failed;
}
/* must succeed. */
amdgpu_ras_resume(adev);
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
} }
/* must succeed. */
amdgpu_ras_resume(adev);
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
if (amdgpu_sriov_vf(adev)) if (amdgpu_sriov_vf(adev))
flush_delayed_work(&adev->delayed_init_work); flush_delayed_work(&adev->delayed_init_work);
...@@ -3577,6 +3615,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -3577,6 +3615,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_device_cache_pci_state(adev->pdev)) if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(pdev); pci_restore_state(pdev);
if (adev->gmc.xgmi.pending_reset)
queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
return 0; return 0;
failed: failed:
...@@ -4287,14 +4329,16 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) ...@@ -4287,14 +4329,16 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
return ret; return ret;
} }
static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job, struct amdgpu_job *job,
bool *need_full_reset_arg) bool *need_full_reset_arg)
{ {
int i, r = 0; int i, r = 0;
bool need_full_reset = *need_full_reset_arg; bool need_full_reset = *need_full_reset_arg;
amdgpu_debugfs_wait_dump(adev); /* no need to dump if device is not in good state during probe period */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_debugfs_wait_dump(adev);
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
/* stop the data exchange thread */ /* stop the data exchange thread */
...@@ -4340,10 +4384,10 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, ...@@ -4340,10 +4384,10 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
return r; return r;
} }
static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
struct list_head *device_list_handle, struct list_head *device_list_handle,
bool *need_full_reset_arg, bool *need_full_reset_arg,
bool skip_hw_reset) bool skip_hw_reset)
{ {
struct amdgpu_device *tmp_adev = NULL; struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset = *need_full_reset_arg, vram_lost = false; bool need_full_reset = *need_full_reset_arg, vram_lost = false;
...@@ -4357,6 +4401,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, ...@@ -4357,6 +4401,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
list_for_each_entry(tmp_adev, device_list_handle, reset_list) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
/* For XGMI run all resets in parallel to speed up the process */ /* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
tmp_adev->gmc.xgmi.pending_reset = false;
if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
r = -EALREADY; r = -EALREADY;
} else } else
...@@ -4395,10 +4440,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, ...@@ -4395,10 +4440,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
list_for_each_entry(tmp_adev, device_list_handle, reset_list) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
if (need_full_reset) { if (need_full_reset) {
/* post card */ /* post card */
if (amdgpu_device_asic_init(tmp_adev)) r = amdgpu_device_asic_init(tmp_adev);
if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!"); dev_warn(tmp_adev->dev, "asic atom init failed!");
} else {
if (!r) {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_device_ip_resume_phase1(tmp_adev); r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r) if (r)
...@@ -4431,6 +4476,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, ...@@ -4431,6 +4476,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
*/ */
amdgpu_register_gpu_instance(tmp_adev); amdgpu_register_gpu_instance(tmp_adev);
if (!hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(tmp_adev);
r = amdgpu_device_ip_late_init(tmp_adev); r = amdgpu_device_ip_late_init(tmp_adev);
if (r) if (r)
goto out; goto out;
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
/* /*
* KMS wrapper. * KMS wrapper.
...@@ -168,8 +169,13 @@ int amdgpu_tmz = -1; /* auto */ ...@@ -168,8 +169,13 @@ int amdgpu_tmz = -1; /* auto */
int amdgpu_reset_method = -1; /* auto */ int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1; int amdgpu_num_kcq = -1;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
struct amdgpu_mgpu_info mgpu_info = { struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
.delayed_reset_work = __DELAYED_WORK_INITIALIZER(
mgpu_info.delayed_reset_work,
amdgpu_drv_delayed_reset_work_handler, 0),
}; };
int amdgpu_ras_enable = -1; int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff; uint amdgpu_ras_mask = 0xffffffff;
...@@ -1321,6 +1327,69 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) ...@@ -1321,6 +1327,69 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
adev->mp1_state = PP_MP1_STATE_NONE; adev->mp1_state = PP_MP1_STATE_NONE;
} }
/**
* amdgpu_drv_delayed_reset_work_handler - work handler for reset
*
* @work: work_struct.
*/
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
{
struct list_head device_list;
struct amdgpu_device *adev;
int i, r;
bool need_full_reset = true;
mutex_lock(&mgpu_info.mutex);
if (mgpu_info.pending_reset == true) {
mutex_unlock(&mgpu_info.mutex);
return;
}
mgpu_info.pending_reset = true;
mutex_unlock(&mgpu_info.mutex);
for (i = 0; i < mgpu_info.num_dgpu; i++) {
adev = mgpu_info.gpu_ins[i].adev;
r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
if (r) {
dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
r, adev_to_drm(adev)->unique);
}
if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
r = -EALREADY;
}
for (i = 0; i < mgpu_info.num_dgpu; i++) {
adev = mgpu_info.gpu_ins[i].adev;
adev->gmc.xgmi.pending_reset = false;
flush_work(&adev->xgmi_reset_work);
}
/* reset function will rebuild the xgmi hive info , clear it now */
for (i = 0; i < mgpu_info.num_dgpu; i++)
amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
INIT_LIST_HEAD(&device_list);
for (i = 0; i < mgpu_info.num_dgpu; i++)
list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list);
/* unregister the GPU first, reset function will add them back */
list_for_each_entry(adev, &device_list, reset_list)
amdgpu_unregister_gpu_instance(adev);
r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
if (r) {
DRM_ERROR("reinit gpus failure");
return;
}
for (i = 0; i < mgpu_info.num_dgpu; i++) {
adev = mgpu_info.gpu_ins[i].adev;
if (!adev->kfd.init_complete)
amdgpu_amdkfd_device_init(adev);
amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return;
}
static int amdgpu_pmops_suspend(struct device *dev) static int amdgpu_pmops_suspend(struct device *dev)
{ {
struct drm_device *drm_dev = dev_get_drvdata(dev); struct drm_device *drm_dev = dev_get_drvdata(dev);
......
...@@ -150,6 +150,7 @@ struct amdgpu_xgmi { ...@@ -150,6 +150,7 @@ struct amdgpu_xgmi {
bool supported; bool supported;
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
bool connected_to_cpu; bool connected_to_cpu;
bool pending_reset;
}; };
struct amdgpu_gmc { struct amdgpu_gmc {
......
...@@ -441,7 +441,7 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) ...@@ -441,7 +441,7 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
if (!__is_ras_eeprom_supported(adev)) if (!__is_ras_eeprom_supported(adev))
return false; return false;
if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) { if (con && (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD)) {
dev_warn(adev->dev, "This GPU is in BAD status."); dev_warn(adev->dev, "This GPU is in BAD status.");
dev_warn(adev->dev, "Please retire it or setting one bigger " dev_warn(adev->dev, "Please retire it or setting one bigger "
"threshold value when reloading driver.\n"); "threshold value when reloading driver.\n");
......
...@@ -499,7 +499,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) ...@@ -499,7 +499,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.supported) if (!adev->gmc.xgmi.supported)
return 0; return 0;
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { if (!adev->gmc.xgmi.pending_reset &&
amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
ret = psp_xgmi_initialize(&adev->psp); ret = psp_xgmi_initialize(&adev->psp);
if (ret) { if (ret) {
dev_err(adev->dev, dev_err(adev->dev,
...@@ -545,7 +546,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) ...@@ -545,7 +546,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
task_barrier_add_task(&hive->tb); task_barrier_add_task(&hive->tb);
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { if (!adev->gmc.xgmi.pending_reset &&
amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
/* update node list for other device in the hive */ /* update node list for other device in the hive */
if (tmp_adev != adev) { if (tmp_adev != adev) {
...@@ -574,7 +576,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) ...@@ -574,7 +576,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
} }
} }
if (!ret) if (!ret && !adev->gmc.xgmi.pending_reset)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
exit_unlock: exit_unlock:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment