Commit afb617f3 authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher

drm/amdgpu: add interface to check mca umc status

Add interface to check mca umc status.
Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6c23f3d1
......@@ -27,6 +27,16 @@
#include "umc/umc_6_7_0_offset.h"
#include "umc/umc_6_7_0_sh_mask.h"
static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
uint64_t mc_status)
{
if (adev->umc.ras->check_ecc_err_status)
return adev->umc.ras->check_ecc_err_status(adev,
AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
return false;
}
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
unsigned long *error_count)
......@@ -257,7 +267,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
amdgpu_ras_error_statistic_ue_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else {
if (!!(MCA_REG__STATUS__DEFERRED(entry->regs[MCA_REG_IDX_STATUS])))
if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
amdgpu_ras_error_statistic_de_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else
......
......@@ -65,6 +65,7 @@ enum amdgpu_mca_ip {
enum amdgpu_mca_error_type {
AMDGPU_MCA_ERROR_TYPE_UE = 0,
AMDGPU_MCA_ERROR_TYPE_CE,
AMDGPU_MCA_ERROR_TYPE_DE,
};
struct amdgpu_mca_ras_block {
......
......@@ -21,7 +21,7 @@
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
#include "amdgpu_ras.h"
#include "amdgpu_mca.h"
/*
* (addr / 256) * 4096, the higher 26 bits in ErrorAddr
* is the index of 4KB block
......@@ -64,6 +64,8 @@ struct amdgpu_umc_ras {
void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
bool (*check_ecc_err_status)(struct amdgpu_device *adev,
enum amdgpu_mca_error_type type, void *ras_error_status);
/* support different eeprom table version for different asic */
void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
};
......
......@@ -422,6 +422,25 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
}
}
static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
enum amdgpu_mca_error_type type, void *ras_error_status)
{
uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
switch (type) {
case AMDGPU_MCA_ERROR_TYPE_UE:
return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
case AMDGPU_MCA_ERROR_TYPE_CE:
return umc_v12_0_is_correctable_error(adev, mc_umc_status);
case AMDGPU_MCA_ERROR_TYPE_DE:
return umc_v12_0_is_deferred_error(adev, mc_umc_status);
default:
return false;
}
return false;
}
static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
{
amdgpu_umc_loop_channels(adev,
......@@ -507,5 +526,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
};
......@@ -2557,9 +2557,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
return 0;
}
if ((type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0)) ||
(type == AMDGPU_MCA_ERROR_TYPE_CE && (umc_v12_0_is_correctable_error(adev, status0) ||
umc_v12_0_is_deferred_error(adev, status0))))
if (umc_v12_0_is_deferred_error(adev, status0) ||
umc_v12_0_is_uncorrectable_error(adev, status0) ||
umc_v12_0_is_correctable_error(adev, status0))
*count = 1;
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment