Commit cf04dfd0 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher

drm/amdgpu: allow ras interrupt callback to return error data

add error data as parameter for ras interrupt cb and process it
Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarDennis Li <dennis.li@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8c948103
...@@ -1005,7 +1005,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) ...@@ -1005,7 +1005,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
struct ras_ih_data *data = &obj->ih_data; struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry; struct amdgpu_iv_entry entry;
int ret; int ret;
struct ras_err_data err_data = {0, 0}; struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) { while (data->rptr != data->wptr) {
rmb(); rmb();
...@@ -1020,14 +1020,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) ...@@ -1020,14 +1020,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
* from the callback to udpate the error type/count, etc * from the callback to udpate the error type/count, etc
*/ */
if (data->cb) { if (data->cb) {
ret = data->cb(obj->adev, &entry); ret = data->cb(obj->adev, &err_data, &entry);
/* ue will trigger an interrupt, and in that case /* ue will trigger an interrupt, and in that case
* we need do a reset to recovery the whole system. * we need do a reset to recovery the whole system.
* But leave IP do that recovery, here we just dispatch * But leave IP do that recovery, here we just dispatch
* the error. * the error.
*/ */
if (ret == AMDGPU_RAS_UE) { if (ret == AMDGPU_RAS_UE) {
obj->err_data.ue_count++; obj->err_data.ue_count += err_data.ue_count;
} }
/* Might need get ce count by register, but not all IP /* Might need get ce count by register, but not all IP
* saves ce count, some IP just use one bit or two bits * saves ce count, some IP just use one bit or two bits
......
...@@ -76,9 +76,6 @@ struct ras_common_if { ...@@ -76,9 +76,6 @@ struct ras_common_if {
char name[32]; char name[32];
}; };
typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
struct amdgpu_ras { struct amdgpu_ras {
/* ras infrastructure */ /* ras infrastructure */
/* for ras itself. */ /* for ras itself. */
...@@ -108,21 +105,6 @@ struct amdgpu_ras { ...@@ -108,21 +105,6 @@ struct amdgpu_ras {
uint32_t flags; uint32_t flags;
}; };
struct ras_ih_data {
/* interrupt bottom half */
struct work_struct ih_work;
int inuse;
/* IP callback */
ras_ih_cb cb;
/* full of entries */
unsigned char *ring;
unsigned int ring_size;
unsigned int element_size;
unsigned int aligned_element_size;
unsigned int rptr;
unsigned int wptr;
};
struct ras_fs_data { struct ras_fs_data {
char sysfs_name[32]; char sysfs_name[32];
char debugfs_name[32]; char debugfs_name[32];
...@@ -149,6 +131,25 @@ struct ras_err_handler_data { ...@@ -149,6 +131,25 @@ struct ras_err_handler_data {
int last_reserved; int last_reserved;
}; };
typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry);
struct ras_ih_data {
/* interrupt bottom half */
struct work_struct ih_work;
int inuse;
/* IP callback */
ras_ih_cb cb;
/* full of entries */
unsigned char *ring;
unsigned int ring_size;
unsigned int element_size;
unsigned int aligned_element_size;
unsigned int rptr;
unsigned int wptr;
};
struct ras_manager { struct ras_manager {
struct ras_common_if head; struct ras_common_if head;
/* reference count */ /* reference count */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment