Commit b408a548 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: Add support for doorbell BOs

This allows user mode to map doorbell pages into GPUVM address space.
That way GPUs can submit to user mode queues (self-dispatch).
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1dde0ea9
...@@ -887,6 +887,24 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, ...@@ -887,6 +887,24 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return ret; return ret;
} }
static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
{
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return NULL;
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
kfree(sg);
return NULL;
}
sg->sgl->dma_address = addr;
sg->sgl->length = size;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->sgl->dma_length = size;
#endif
return sg;
}
static int process_validate_vms(struct amdkfd_process_info *process_info) static int process_validate_vms(struct amdkfd_process_info *process_info)
{ {
struct amdgpu_vm *peer_vm; struct amdgpu_vm *peer_vm;
...@@ -1170,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1170,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{ {
struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0; uint64_t user_addr = 0;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
...@@ -1198,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1198,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (!offset || !*offset) if (!offset || !*offset)
return -EINVAL; return -EINVAL;
user_addr = *offset; user_addr = *offset;
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
alloc_flags = 0;
if (size > UINT_MAX)
return -EINVAL;
sg = create_doorbell_sg(*offset, size);
if (!sg)
return -ENOMEM;
} else { } else {
return -EINVAL; return -EINVAL;
} }
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem) if (!*mem) {
return -ENOMEM; ret = -ENOMEM;
goto err;
}
INIT_LIST_HEAD(&(*mem)->bo_va_list); INIT_LIST_HEAD(&(*mem)->bo_va_list);
mutex_init(&(*mem)->lock); mutex_init(&(*mem)->lock);
(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
...@@ -1237,7 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1237,7 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
amdgpu_sync_create(&(*mem)->sync); amdgpu_sync_create(&(*mem)->sync);
ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false); ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
if (ret) { if (ret) {
pr_debug("Insufficient system memory\n"); pr_debug("Insufficient system memory\n");
goto err_reserve_limit; goto err_reserve_limit;
...@@ -1251,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1251,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bp.byte_align = byte_align; bp.byte_align = byte_align;
bp.domain = alloc_domain; bp.domain = alloc_domain;
bp.flags = alloc_flags; bp.flags = alloc_flags;
bp.type = ttm_bo_type_device; bp.type = bo_type;
bp.resv = NULL; bp.resv = NULL;
ret = amdgpu_bo_create(adev, &bp, &bo); ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret) { if (ret) {
...@@ -1259,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1259,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
domain_string(alloc_domain), ret); domain_string(alloc_domain), ret);
goto err_bo_create; goto err_bo_create;
} }
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
bo->tbo.ttm->sg = sg;
}
bo->kfd_bo = *mem; bo->kfd_bo = *mem;
(*mem)->bo = bo; (*mem)->bo = bo;
if (user_addr) if (user_addr)
...@@ -1290,10 +1326,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1290,10 +1326,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */ /* Don't unreserve system mem limit twice */
goto err_reserve_limit; goto err_reserve_limit;
err_bo_create: err_bo_create:
unreserve_mem_limit(adev, size, alloc_domain, false); unreserve_mem_limit(adev, size, alloc_domain, !!sg);
err_reserve_limit: err_reserve_limit:
mutex_destroy(&(*mem)->lock); mutex_destroy(&(*mem)->lock);
kfree(*mem); kfree(*mem);
err:
if (sg) {
sg_free_table(sg);
kfree(sg);
}
return ret; return ret;
} }
...@@ -1363,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ...@@ -1363,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the sync object */ /* Free the sync object */
amdgpu_sync_free(&mem->sync); amdgpu_sync_free(&mem->sync);
/* If the SG is not NULL, it's one we created for a doorbell
* BO. We need to free it.
*/
if (mem->bo->tbo.sg) {
sg_free_table(mem->bo->tbo.sg);
kfree(mem->bo->tbo.sg);
}
/* Free the BO*/ /* Free the BO*/
amdgpu_bo_unref(&mem->bo); amdgpu_bo_unref(&mem->bo);
mutex_destroy(&mem->lock); mutex_destroy(&mem->lock);
......
...@@ -1274,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, ...@@ -1274,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return -EINVAL; return -EINVAL;
} }
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
}
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
pdd = kfd_bind_process_to_device(dev, p); pdd = kfd_bind_process_to_device(dev, p);
......
...@@ -188,8 +188,8 @@ struct tile_config { ...@@ -188,8 +188,8 @@ struct tile_config {
*/ */
#define ALLOC_MEM_FLAGS_VRAM (1 << 0) #define ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define ALLOC_MEM_FLAGS_GTT (1 << 1) #define ALLOC_MEM_FLAGS_GTT (1 << 1)
#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */ #define ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */ #define ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
/* /*
* Allocation flags attributes/access options. * Allocation flags attributes/access options.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment