Commit 939e2258 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: querry umc error count

check umc error count in both ras querry function and
ras interrupt handler
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarDennis Li <dennis.li@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5b6b35aa
...@@ -588,11 +588,19 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, ...@@ -588,11 +588,19 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
struct ras_query_if *info) struct ras_query_if *info)
{ {
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
struct ras_err_data err_data = {0, 0};
if (!obj) if (!obj)
return -EINVAL; return -EINVAL;
/* TODO might read the register to read the count */
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
if (adev->umc_funcs->query_ras_error_count)
adev->umc_funcs->query_ras_error_count(adev, &err_data);
break;
default:
break;
}
info->ue_count = obj->err_data.ue_count; info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count; info->ce_count = obj->err_data.ce_count;
...@@ -986,6 +994,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) ...@@ -986,6 +994,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
struct ras_ih_data *data = &obj->ih_data; struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry; struct amdgpu_iv_entry entry;
int ret; int ret;
struct ras_err_data err_data = {0, 0};
while (data->rptr != data->wptr) { while (data->rptr != data->wptr) {
rmb(); rmb();
......
...@@ -245,7 +245,10 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, ...@@ -245,7 +245,10 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
struct ras_err_data err_data = {0, 0};
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc_funcs->query_ras_error_count)
adev->umc_funcs->query_ras_error_count(adev, &err_data);
amdgpu_ras_reset_gpu(adev, 0); amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE; return AMDGPU_RAS_UE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment