Commit acc0204c authored by Guchun Chen's avatar Guchun Chen Committed by Alex Deucher

drm/amdgpu: add bad page count threshold in module parameter(v3)

bad_page_threshold could be configured to enable/disable the
associated bad page retirement feature in RAS.

When it's -1, ras will use typical bad page failure value to
handle bad page retirement.

When it's 0, disable bad page retirement, and no bad page
will be recorded and saved.

For other valid value, driver will use this manual value
as the threshold value of totoal bad pages.

v2: correct documentation of this parameter.

v3: remove confused statement in documentation.
Signed-off-by: default avatarGuchun Chen <guchun.chen@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 522ec6e0
...@@ -178,6 +178,7 @@ extern uint amdgpu_dm_abm_level; ...@@ -178,6 +178,7 @@ extern uint amdgpu_dm_abm_level;
extern struct amdgpu_mgpu_info mgpu_info; extern struct amdgpu_mgpu_info mgpu_info;
extern int amdgpu_ras_enable; extern int amdgpu_ras_enable;
extern uint amdgpu_ras_mask; extern uint amdgpu_ras_mask;
extern int amdgpu_bad_page_threshold;
extern int amdgpu_async_gfx_ring; extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp; extern int amdgpu_mcbp;
extern int amdgpu_discovery; extern int amdgpu_discovery;
......
...@@ -156,6 +156,7 @@ struct amdgpu_mgpu_info mgpu_info = { ...@@ -156,6 +156,7 @@ struct amdgpu_mgpu_info mgpu_info = {
}; };
int amdgpu_ras_enable = -1; int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff; uint amdgpu_ras_mask = 0xffffffff;
int amdgpu_bad_page_threshold = -1;
/** /**
* DOC: vramlimit (int) * DOC: vramlimit (int)
...@@ -765,6 +766,16 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); ...@@ -765,6 +766,16 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
module_param_named(reset_method, amdgpu_reset_method, int, 0444); module_param_named(reset_method, amdgpu_reset_method, int, 0444);
/**
* DOC: bad_page_threshold (int)
* Bad page threshold is to specify the threshold value of faulty pages
* detected by RAS ECC, that may result in GPU entering bad status if total
* faulty pages by ECC exceed threshold value and leave it for user's further
* check.
*/
MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)");
module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
static const struct pci_device_id pciidlist[] = { static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI #ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment