Commit 22106ed0 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher

drm/amdgpu: add bad_page_threshold check in ras_eeprom_check_err

bad_page_threshold controls page retirement behavior and it should be
also checked.

v2: simplify the condition of bad page handling path.
Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarStanley.Yang <Stanley.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f3cbe70e
......@@ -417,7 +417,8 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!__is_ras_eeprom_supported(adev))
if (!__is_ras_eeprom_supported(adev) ||
!amdgpu_bad_page_threshold)
return false;
/* skip check eeprom table for VEGA20 Gaming */
......@@ -428,10 +429,18 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
return false;
if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
dev_warn(adev->dev, "This GPU is in BAD status.");
dev_warn(adev->dev, "Please retire it or set a larger "
"threshold value when reloading driver.\n");
return true;
if (amdgpu_bad_page_threshold == -1) {
dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
dev_warn(adev->dev,
"But GPU can be operated due to bad_page_threshold = -1.\n");
return false;
} else {
dev_warn(adev->dev, "This GPU is in BAD status.");
dev_warn(adev->dev, "Please retire it or set a larger "
"threshold value when reloading driver.\n");
return true;
}
}
return false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment