drm/amdkfd: CRIU implement gpu_id remapping

When doing a restore on a different node, the gpu_id's on the restore node may be different. But the user space application will still refer use the original gpu_id's in the ioctl calls. Adding code to create a gpu id mapping so that kfd can determine actual gpu_id during the user ioctl's. Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: David Yat Sin <david.yatsin@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

drm/amdkfd: CRIU implement gpu_id remapping
When doing a restore on a different node, the gpu_id's on the restore node may be different. But the user space application will still refer use the original gpu_id's in the ioctl calls. Adding code to create a gpu id mapping so that kfd can determine actual gpu_id during the user ioctl's. Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: David Yat Sin <david.yatsin@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
bef153b7 · David Yat Sin · Alex Deucher · 40e8a766 · bef153b7 · bef153b7
Commit bef153b7 authored Apr 09, 2021 by David Yat Sin Committed by Alex Deucher Feb 07, 2022
5 changed files
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -342,11 +342,12 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
 		return -EINVAL;
 	}

-	kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
-	if (!kfd) {
+	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
+	if (!pdd) {
 		pr_err("Getting device by id failed in %s\n", __func__);
 		return -EINVAL;
 	}
+	kfd = pdd->dev;

 	pdd = kfd_bind_process_to_device(kfd, p);
 	if (IS_ERR(pdd))
@@ -1094,6 +1095,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
 {
 	struct kfd_hsa_memory_exception_data memory_exception_data;
 	struct vm_area_struct *vma;
+	int user_gpu_id;

 	/*
 	 * Because we are called from arbitrary context (workqueue) as opposed
@@ -1115,12 +1117,17 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
 		return; /* Process is exiting */
 	}

+	user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+	if (unlikely(user_gpu_id == -EINVAL)) {
+		WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+		return;
+	}
 	memset(&memory_exception_data, 0, sizeof(memory_exception_data));

 	mmap_read_lock(mm);
 	vma = find_vma(mm, address);

-	memory_exception_data.gpu_id = dev->id;
+	memory_exception_data.gpu_id = user_gpu_id;
 	memory_exception_data.va = address;
 	/* Set failure reason */
 	memory_exception_data.failure.NotPresent = 1;
@@ -1196,11 +1203,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
 	uint32_t id;
 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
 	struct kfd_hsa_memory_exception_data memory_exception_data;
+	int user_gpu_id;

 	if (!p)
 		return; /* Presumably process exited. */
+
+	user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+	if (unlikely(user_gpu_id == -EINVAL)) {
+		WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+		return;
+	}
+
 	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
-	memory_exception_data.gpu_id = dev->id;
+	memory_exception_data.gpu_id = user_gpu_id;
 	memory_exception_data.failure.imprecise = true;
 	/* Set failure reason */
 	if (info) {
@@ -1240,27 +1255,34 @@ void kfd_signal_reset_event(struct kfd_dev *dev)

 	/* Whole gpu reset caused by GPU hang and memory is lost */
 	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
-	hw_exception_data.gpu_id = dev->id;
 	hw_exception_data.memory_lost = 1;
 	hw_exception_data.reset_cause = reset_cause;

 	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
 	memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
-	memory_exception_data.gpu_id = dev->id;
 	memory_exception_data.failure.imprecise = true;

 	idx = srcu_read_lock(&kfd_processes_srcu);
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+
+		if (unlikely(user_gpu_id == -EINVAL)) {
+			WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+			continue;
+		}
+
 		mutex_lock(&p->event_mutex);
 		id = KFD_FIRST_NONSIGNAL_EVENT_ID;
 		idr_for_each_entry_continue(&p->event_idr, ev, id) {
 			if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
 				ev->hw_exception_data = hw_exception_data;
+				ev->hw_exception_data.gpu_id = user_gpu_id;
 				set_event(ev);
 			}
 			if (ev->type == KFD_EVENT_TYPE_MEMORY &&
 			    reset_cause == KFD_HW_EXCEPTION_ECC) {
 				ev->memory_exception_data = memory_exception_data;
+				ev->memory_exception_data.gpu_id = user_gpu_id;
 				set_event(ev);
 			}
 		}
@@ -1276,18 +1298,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
 	struct kfd_hsa_hw_exception_data hw_exception_data;
 	struct kfd_event *ev;
 	uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	int user_gpu_id;

 	if (!p)
 		return; /* Presumably process exited. */

+	user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+	if (unlikely(user_gpu_id == -EINVAL)) {
+		WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+		return;
+	}
+
 	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
-	hw_exception_data.gpu_id = dev->id;
+	hw_exception_data.gpu_id = user_gpu_id;
 	hw_exception_data.memory_lost = 1;
 	hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;

 	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
 	memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
-	memory_exception_data.gpu_id = dev->id;
+	memory_exception_data.gpu_id = user_gpu_id;
 	memory_exception_data.failure.imprecise = true;

 	mutex_lock(&p->event_mutex);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -774,6 +774,12 @@ struct kfd_process_device {
 	uint64_t faults;
 	uint64_t page_in;
 	uint64_t page_out;
+	/*
+	 * If this process has been checkpointed before, then the user
+	 * application will use the original gpu_id on the
+	 * checkpointed node to refer to this device.
+	 */
+	uint32_t user_gpu_id;
 };

 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -933,6 +939,11 @@ int kfd_process_restore_queues(struct kfd_process *p);
 void kfd_suspend_all_processes(void);
 int kfd_resume_all_processes(void);

+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
+							 uint32_t gpu_id);
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
+
 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 			       struct file *drm_file);
 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1526,6 +1526,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	pdd->runtime_inuse = false;
 	pdd->vram_usage = 0;
 	pdd->sdma_past_activity_counter = 0;
+	pdd->user_gpu_id = dev->id;
 	atomic64_set(&pdd->evict_duration_counter, 0);
 	p->pdds[p->n_pdds++] = pdd;

@@ -1981,6 +1982,37 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
 	}
 }

+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
+{
+	int i;
+
+	if (gpu_id) {
+		for (i = 0; i < p->n_pdds; i++) {
+			struct kfd_process_device *pdd = p->pdds[i];
+
+			if (pdd->user_gpu_id == gpu_id)
+				return pdd;
+		}
+	}
+	return NULL;
+}
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
+{
+	int i;
+
+	if (!actual_gpu_id)
+		return 0;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		if (pdd->dev->id == actual_gpu_id)
+			return pdd->user_gpu_id;
+	}
+	return -EINVAL;
+}
+
 #if defined(CONFIG_DEBUG_FS)

 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -610,7 +610,7 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd,
 	mqd = (void *)(q_data + 1);
 	ctl_stack = mqd + q_data->mqd_size;

-	q_data->gpu_id = pdd->dev->id;
+	q_data->gpu_id = pdd->user_gpu_id;
 	q_data->type = q->properties.type;
 	q_data->format = q->properties.format;
 	q_data->q_id =  q->properties.queue_id;
@@ -769,7 +769,6 @@ int kfd_criu_restore_queue(struct kfd_process *p,
 	uint64_t q_extra_data_size;
 	struct queue_properties qp;
 	unsigned int queue_id;
-	struct kfd_dev *dev;
 	int ret = 0;

 	if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
@@ -807,20 +806,11 @@ int kfd_criu_restore_queue(struct kfd_process *p,

 	*priv_data_offset += q_extra_data_size;

-	dev = kfd_device_by_id(q_data->gpu_id);
-	if (!dev) {
-		pr_err("Could not get kfd_dev from gpu_id = 0x%x\n",
-		q_data->gpu_id);
-
-		ret = -EINVAL;
-		goto exit;
-	}
-
-	pdd = kfd_get_process_device_data(dev, p);
+	pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
 	if (!pdd) {
 		pr_err("Failed to get pdd\n");
-		ret = -EFAULT;
-		return ret;
+		ret = -EINVAL;
+		goto exit;
 	}
 	/* data stored in this order: mqd, ctl_stack */
 	mqd = q_extra_data;