Commit 0c49e0b8 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amdgpu: check if vram is lost v2

backup first 64 byte of gart table as reset magic, check if magic is same
after gpu hw reset.
v2: use memcmp instead of manual innovation.
Signed-off-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e0ec4506
...@@ -1427,6 +1427,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); ...@@ -1427,6 +1427,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
#define AMDGPU_RESET_MAGIC_NUM 64
struct amdgpu_device { struct amdgpu_device {
struct device *dev; struct device *dev;
struct drm_device *ddev; struct drm_device *ddev;
...@@ -1619,6 +1620,7 @@ struct amdgpu_device { ...@@ -1619,6 +1620,7 @@ struct amdgpu_device {
/* record hw reset is performed */ /* record hw reset is performed */
bool has_hw_reset; bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
}; };
......
...@@ -1658,6 +1658,17 @@ static int amdgpu_init(struct amdgpu_device *adev) ...@@ -1658,6 +1658,17 @@ static int amdgpu_init(struct amdgpu_device *adev)
return 0; return 0;
} }
static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
{
memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
}
static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
{
return !!memcmp(adev->gart.ptr, adev->reset_magic,
AMDGPU_RESET_MAGIC_NUM);
}
static int amdgpu_late_init(struct amdgpu_device *adev) static int amdgpu_late_init(struct amdgpu_device *adev)
{ {
int i = 0, r; int i = 0, r;
...@@ -1688,6 +1699,8 @@ static int amdgpu_late_init(struct amdgpu_device *adev) ...@@ -1688,6 +1699,8 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
} }
} }
amdgpu_fill_reset_magic(adev);
return 0; return 0;
} }
...@@ -2762,7 +2775,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) ...@@ -2762,7 +2775,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
{ {
int i, r; int i, r;
int resched; int resched;
bool need_full_reset; bool need_full_reset, vram_lost = false;
if (!amdgpu_check_soft_reset(adev)) { if (!amdgpu_check_soft_reset(adev)) {
DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
...@@ -2825,12 +2838,17 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) ...@@ -2825,12 +2838,17 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
r = amdgpu_resume_phase1(adev); r = amdgpu_resume_phase1(adev);
if (r) if (r)
goto out; goto out;
vram_lost = amdgpu_check_vram_lost(adev);
if (vram_lost)
DRM_ERROR("VRAM is lost!\n");
r = amdgpu_ttm_recover_gart(adev); r = amdgpu_ttm_recover_gart(adev);
if (r) if (r)
goto out; goto out;
r = amdgpu_resume_phase2(adev); r = amdgpu_resume_phase2(adev);
if (r) if (r)
goto out; goto out;
if (vram_lost)
amdgpu_fill_reset_magic(adev);
} }
} }
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment