Commit c8365dbd authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: revert "Add autodump debugfs node for gpu reset v8"

This reverts commit 728e7e0c.

Further discussion reveals that this feature is severely broken
and needs to be reverted ASAP.

GPU reset can never be delayed by userspace even for debugging or
otherwise we can run into in kernel deadlocks.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Acked-by: default avatarNirmoy Das <nirmoy.das@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 286826d7
...@@ -1085,8 +1085,6 @@ struct amdgpu_device { ...@@ -1085,8 +1085,6 @@ struct amdgpu_device {
char product_name[32]; char product_name[32];
char serial[20]; char serial[20];
struct amdgpu_autodump autodump;
atomic_t throttling_logging_enabled; atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs; struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled; uint32_t ras_hw_enabled;
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/poll.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_pm.h" #include "amdgpu_pm.h"
...@@ -38,85 +37,7 @@ ...@@ -38,85 +37,7 @@
#include "amdgpu_fw_attestation.h" #include "amdgpu_fw_attestation.h"
#include "amdgpu_umr.h" #include "amdgpu_umr.h"
int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
unsigned long timeout = 600 * HZ;
int ret;
wake_up_interruptible(&adev->autodump.gpu_hang);
ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
if (ret == 0) {
pr_err("autodump: timeout, move on to gpu recovery\n");
return -ETIMEDOUT;
}
#endif
return 0;
}
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
{
struct amdgpu_device *adev = inode->i_private;
int ret;
file->private_data = adev;
ret = down_read_killable(&adev->reset_sem);
if (ret)
return ret;
if (adev->autodump.dumping.done) {
reinit_completion(&adev->autodump.dumping);
ret = 0;
} else {
ret = -EBUSY;
}
up_read(&adev->reset_sem);
return ret;
}
static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
{
struct amdgpu_device *adev = file->private_data;
complete_all(&adev->autodump.dumping);
return 0;
}
static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
{
struct amdgpu_device *adev = file->private_data;
poll_wait(file, &adev->autodump.gpu_hang, poll_table);
if (amdgpu_in_reset(adev))
return POLLIN | POLLRDNORM | POLLWRNORM;
return 0;
}
static const struct file_operations autodump_debug_fops = {
.owner = THIS_MODULE,
.open = amdgpu_debugfs_autodump_open,
.poll = amdgpu_debugfs_autodump_poll,
.release = amdgpu_debugfs_autodump_release,
};
static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
{
init_completion(&adev->autodump.dumping);
complete_all(&adev->autodump.dumping);
init_waitqueue_head(&adev->autodump.gpu_hang);
debugfs_create_file("amdgpu_autodump", 0600,
adev_to_drm(adev)->primary->debugfs_root,
adev, &autodump_debug_fops);
}
/** /**
* amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
...@@ -1738,7 +1659,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) ...@@ -1738,7 +1659,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
} }
amdgpu_ras_debugfs_create_all(adev); amdgpu_ras_debugfs_create_all(adev);
amdgpu_debugfs_autodump_init(adev);
amdgpu_rap_debugfs_init(adev); amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev);
......
...@@ -25,10 +25,6 @@ ...@@ -25,10 +25,6 @@
/* /*
* Debugfs * Debugfs
*/ */
struct amdgpu_autodump {
struct completion dumping;
struct wait_queue_head gpu_hang;
};
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev);
...@@ -36,4 +32,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev); ...@@ -36,4 +32,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
...@@ -4440,10 +4440,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, ...@@ -4440,10 +4440,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (reset_context->reset_req_dev == adev) if (reset_context->reset_req_dev == adev)
job = reset_context->job; job = reset_context->job;
/* no need to dump if device is not in good state during probe period */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_debugfs_wait_dump(adev);
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
/* stop the data exchange thread */ /* stop the data exchange thread */
amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_fini_data_exchange(adev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment