Commit fdc94d3a authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: Rework pcie_bif ras sw_init

pcie_bif ras blocks needs to be initialized as early
as possible to handle fatal error detected in hw_init
phase. also align the pcie_bif ras sw_init with other
ras blocks
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarStanley Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent da9d669e
...@@ -22,6 +22,29 @@ ...@@ -22,6 +22,29 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
{
int err;
struct amdgpu_nbio_ras *ras;
if (!adev->nbio.ras)
return 0;
ras = adev->nbio.ras;
err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
if (err) {
dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
return err;
}
strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->nbio.ras_if = &ras->ras_block.ras_comm;
return 0;
}
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{ {
int r; int r;
......
...@@ -106,5 +106,6 @@ struct amdgpu_nbio { ...@@ -106,5 +106,6 @@ struct amdgpu_nbio {
struct amdgpu_nbio_ras *ras; struct amdgpu_nbio_ras *ras;
}; };
int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
#endif #endif
...@@ -2554,21 +2554,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev) ...@@ -2554,21 +2554,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* initialize nbio ras function ahead of any other /* initialize nbio ras function ahead of any other
* ras functions so hardware fatal error interrupt * ras functions so hardware fatal error interrupt
* can be enabled as early as possible */ * can be enabled as early as possible */
switch (adev->asic_type) { switch (adev->ip_versions[NBIO_HWIP][0]) {
case CHIP_VEGA20: case IP_VERSION(7, 4, 0):
case CHIP_ARCTURUS: case IP_VERSION(7, 4, 1):
case CHIP_ALDEBARAN: case IP_VERSION(7, 4, 4):
if (!adev->gmc.xgmi.connected_to_cpu) { if (!adev->gmc.xgmi.connected_to_cpu)
adev->nbio.ras = &nbio_v7_4_ras; adev->nbio.ras = &nbio_v7_4_ras;
amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block);
adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm;
}
break; break;
default: default:
/* nbio ras is not available */ /* nbio ras is not available */
break; break;
} }
/* nbio ras block needs to be enabled ahead of other ras blocks
* to handle fatal error */
r = amdgpu_nbio_ras_sw_init(adev);
if (r)
return r;
if (adev->nbio.ras && if (adev->nbio.ras &&
adev->nbio.ras->init_ras_controller_interrupt) { adev->nbio.ras->init_ras_controller_interrupt) {
r = adev->nbio.ras->init_ras_controller_interrupt(adev); r = adev->nbio.ras->init_ras_controller_interrupt(adev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment