Commit 55977744 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Add GPU reset SMI event

Add support for reporting GPU reset events through SMI. KFD
would report both pre and post GPU reset events.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e230ac11
...@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) ...@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete) if (!kfd->init_complete)
return 0; return 0;
kfd_smi_event_update_gpu_reset(kfd, false);
kfd->dqm->ops.pre_reset(kfd->dqm); kfd->dqm->ops.pre_reset(kfd->dqm);
kgd2kfd_suspend(kfd, false); kgd2kfd_suspend(kfd, false);
...@@ -840,6 +842,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) ...@@ -840,6 +842,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
atomic_set(&kfd->sram_ecc_flag, 0); atomic_set(&kfd->sram_ecc_flag, 0);
kfd_smi_event_update_gpu_reset(kfd, true);
return 0; return 0;
} }
......
...@@ -312,6 +312,8 @@ struct kfd_dev { ...@@ -312,6 +312,8 @@ struct kfd_dev {
/* Clients watching SMI events */ /* Clients watching SMI events */
struct list_head smi_clients; struct list_head smi_clients;
spinlock_t smi_lock; spinlock_t smi_lock;
uint32_t reset_seq_num;
}; };
enum kfd_mempool { enum kfd_mempool {
......
...@@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event, ...@@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock(); rcu_read_unlock();
} }
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
{
/*
* GpuReset msg = Reset seq number (incremented for
* every reset message sent before GPU reset).
* 1 byte event + 1 byte space + 8 bytes seq num +
* 1 byte \n + 1 byte \0 = 12
*/
char fifo_in[12];
int len;
unsigned int event;
if (list_empty(&dev->smi_clients))
return;
memset(fifo_in, 0x0, sizeof(fifo_in));
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
} else {
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
dev->reset_seq_num);
add_event_to_kfifo(dev, event, fifo_in, len);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask) uint32_t throttle_bitmask)
{ {
...@@ -191,7 +221,7 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, ...@@ -191,7 +221,7 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
if (list_empty(&dev->smi_clients)) if (list_empty(&dev->smi_clients))
return; return;
len = snprintf(fifo_in, 29, "%x %x:%llx\n", len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
atomic64_read(&adev->smu.throttle_int_counter)); atomic64_read(&adev->smu.throttle_int_counter));
...@@ -218,7 +248,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) ...@@ -218,7 +248,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid) if (!task_info.pid)
return; return;
len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT, len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name); task_info.pid, task_info.task_name);
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len); add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
......
...@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); ...@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask); uint32_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
#endif #endif
...@@ -453,6 +453,8 @@ enum kfd_smi_event { ...@@ -453,6 +453,8 @@ enum kfd_smi_event {
KFD_SMI_EVENT_NONE = 0, /* not used */ KFD_SMI_EVENT_NONE = 0, /* not used */
KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
KFD_SMI_EVENT_THERMAL_THROTTLE = 2, KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
KFD_SMI_EVENT_GPU_PRE_RESET = 3,
KFD_SMI_EVENT_GPU_POST_RESET = 4,
}; };
#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment