Commit f493dd64 authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher

drm/amdgpu: prepare for logging ecc errors

Prepare for logging ecc errors.
Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 98b5bc87
...@@ -2737,6 +2737,35 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, ...@@ -2737,6 +2737,35 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
} }
#endif #endif
static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
{
mutex_init(&ecc_log->lock);
/* Set any value as siphash key */
memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key));
INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
ecc_log->de_updated = false;
}
static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
{
struct radix_tree_iter iter;
void __rcu **slot;
struct ras_ecc_err *ecc_err;
mutex_lock(&ecc_log->lock);
radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
ecc_err = radix_tree_deref_slot(slot);
kfree(ecc_err->err_pages.pfn);
kfree(ecc_err);
radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
}
mutex_unlock(&ecc_log->lock);
mutex_destroy(&ecc_log->lock);
ecc_log->de_updated = false;
}
static int amdgpu_ras_page_retirement_thread(void *param) static int amdgpu_ras_page_retirement_thread(void *param)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)param; struct amdgpu_device *adev = (struct amdgpu_device *)param;
...@@ -2838,6 +2867,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) ...@@ -2838,6 +2867,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
} }
amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
#ifdef CONFIG_X86_MCE_AMD #ifdef CONFIG_X86_MCE_AMD
if ((adev->asic_type == CHIP_ALDEBARAN) && if ((adev->asic_type == CHIP_ALDEBARAN) &&
(adev->gmc.xgmi.connected_to_cpu)) (adev->gmc.xgmi.connected_to_cpu))
...@@ -2882,6 +2912,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) ...@@ -2882,6 +2912,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
cancel_work_sync(&con->recovery_work); cancel_work_sync(&con->recovery_work);
amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
mutex_lock(&con->recovery_lock); mutex_lock(&con->recovery_lock);
con->eh_data = NULL; con->eh_data = NULL;
kfree(data->bps); kfree(data->bps);
......
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/kfifo.h> #include <linux/kfifo.h>
#include <linux/radix-tree.h>
#include <linux/siphash.h>
#include "ta_ras_if.h" #include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h" #include "amdgpu_ras_eeprom.h"
#include "amdgpu_smuio.h" #include "amdgpu_smuio.h"
...@@ -454,6 +456,26 @@ struct ras_poison_msg { ...@@ -454,6 +456,26 @@ struct ras_poison_msg {
void *data; void *data;
}; };
struct ras_err_pages {
uint32_t count;
uint64_t *pfn;
};
struct ras_ecc_err {
u64 hash_index;
uint64_t status;
uint64_t ipid;
uint64_t addr;
struct ras_err_pages err_pages;
};
struct ras_ecc_log_info {
struct mutex lock;
siphash_key_t ecc_key;
struct radix_tree_root de_page_tree;
bool de_updated;
};
struct amdgpu_ras { struct amdgpu_ras {
/* ras infrastructure */ /* ras infrastructure */
/* for ras itself. */ /* for ras itself. */
...@@ -514,6 +536,7 @@ struct amdgpu_ras { ...@@ -514,6 +536,7 @@ struct amdgpu_ras {
atomic_t page_retirement_req_cnt; atomic_t page_retirement_req_cnt;
struct mutex page_rsv_lock; struct mutex page_rsv_lock;
DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
struct ras_ecc_log_info umc_ecc_log;
/* Fatal error detected flag */ /* Fatal error detected flag */
atomic_t fed; atomic_t fed;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment