Commit b25fdc04 authored by James Zhu's avatar James Zhu Committed by Alex Deucher

drm/amdgpu: skip xcp drm device allocation when out of drm resource

Return 0 when drm device alloc failed with -ENOSPC in
order to  allow amdgpu drive loading. But the xcp without
drm device node assigned won't be visiable in user space.
This helps amdgpu driver loading on system which has more
than 64 nodes, the current limitation.

The proposal to add more drm nodes is discussed in public,
which will support up to 2^20 nodes totally.
kernel drm:
https://lore.kernel.org/lkml/20230724211428.3831636-1-michal.winiarski@intel.com/T/
libdrm:
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/305Signed-off-by: default avatarJames Zhu <James.Zhu@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d621114f
...@@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) ...@@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
for (i = 1; i < MAX_XCP; i++) { for (i = 1; i < MAX_XCP; i++) {
ret = amdgpu_xcp_drm_dev_alloc(&p_ddev); ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
if (ret) if (ret == -ENOSPC) {
dev_warn(adev->dev,
"Skip xcp node #%d when out of drm node resource.", i);
return 0;
} else if (ret) {
return ret; return ret;
}
/* Redirect all IOCTLs to the primary device */ /* Redirect all IOCTLs to the primary device */
adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev; adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
...@@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev, ...@@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
return 0; return 0;
for (i = 1; i < MAX_XCP; i++) { for (i = 1; i < MAX_XCP; i++) {
if (!adev->xcp_mgr->xcp[i].ddev)
break;
ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
if (ret) if (ret)
return ret; return ret;
...@@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) ...@@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
return; return;
for (i = 1; i < MAX_XCP; i++) { for (i = 1; i < MAX_XCP; i++) {
if (!adev->xcp_mgr->xcp[i].ddev)
break;
p_ddev = adev->xcp_mgr->xcp[i].ddev; p_ddev = adev->xcp_mgr->xcp[i].ddev;
drm_dev_unplug(p_ddev); drm_dev_unplug(p_ddev);
p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev; p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
......
...@@ -1965,7 +1965,14 @@ int kfd_topology_add_device(struct kfd_node *gpu) ...@@ -1965,7 +1965,14 @@ int kfd_topology_add_device(struct kfd_node *gpu)
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
gpu_id = kfd_generate_gpu_id(gpu); gpu_id = kfd_generate_gpu_id(gpu);
if (gpu->xcp && !gpu->xcp->ddev) {
dev_warn(gpu->adev->dev,
"Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
gpu_id);
return 0;
} else {
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
}
/* Check to see if this gpu device exists in the topology_device_list. /* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device, * If so, assign the gpu to that device,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment