Commit 59d7115d authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Move process doorbell allocation into kfd device

Move doorbell allocation for a process into kfd device and
allocate doorbell space in each PDD during process creation.
Currently, KFD manages its own doorbell space but for some
devices, amdgpu would allocate the complete doorbell
space instead of leaving a chunk of doorbell space for KFD to
manage. In a system with mix of such devices, KFD would need
to request process doorbell space based on the type of device,
either from amdgpu or from its own doorbell space.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b7b6c385
...@@ -1291,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, ...@@ -1291,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return -EINVAL; return -EINVAL;
} }
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE)
return -EINVAL;
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset)
return -ENOMEM;
}
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
pdd = kfd_bind_process_to_device(dev, p); pdd = kfd_bind_process_to_device(dev, p);
...@@ -1311,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, ...@@ -1311,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock; goto err_unlock;
} }
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev)) {
err = -EINVAL;
goto err_unlock;
}
offset = kfd_get_process_doorbells(pdd);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE) {
err = -EINVAL;
goto err_unlock;
}
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset) {
err = -ENOMEM;
goto err_unlock;
}
}
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size, dev->kgd, args->va_addr, args->size,
pdd->vm, (struct kgd_mem **) &mem, &offset, pdd->vm, (struct kgd_mem **) &mem, &offset,
......
...@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, ...@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
atomic_set(&kfd->sram_ecc_flag, 0); atomic_set(&kfd->sram_ecc_flag, 0);
ida_init(&kfd->doorbell_ida);
return kfd; return kfd;
} }
...@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) ...@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_interrupt_exit(kfd); kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd); kfd_topology_remove_device(kfd);
kfd_doorbell_fini(kfd); kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd); kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (kfd->gws) if (kfd->gws)
......
...@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) ...@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
} }
q->properties.doorbell_off = q->properties.doorbell_off =
kfd_get_doorbell_dw_offset_in_bar(dev, q->process, kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
q->doorbell_id); q->doorbell_id);
return 0; return 0;
} }
......
...@@ -31,9 +31,6 @@ ...@@ -31,9 +31,6 @@
* kernel queues using the first doorbell page reserved for the kernel. * kernel queues using the first doorbell page reserved for the kernel.
*/ */
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
/* /*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that * Each device exposes a doorbell aperture, a PCI MMIO aperture that
* receives 32-bit writes that are passed to queues as wptr values. * receives 32-bit writes that are passed to queues as wptr values.
...@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd) ...@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
else else
return -ENOSPC; return -ENOSPC;
if (!max_doorbell_slices || if (!kfd->max_doorbell_slices ||
doorbell_process_limit < max_doorbell_slices) doorbell_process_limit < kfd->max_doorbell_slices)
max_doorbell_slices = doorbell_process_limit; kfd->max_doorbell_slices = doorbell_process_limit;
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset; doorbell_start_offset;
...@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, ...@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
phys_addr_t address; phys_addr_t address;
struct kfd_process_device *pdd;
/* /*
* For simplicitly we only allow mapping of the entire doorbell * For simplicitly we only allow mapping of the entire doorbell
...@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, ...@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL; return -EINVAL;
/* Calculate physical address of doorbell */ pdd = kfd_get_process_device_data(dev, process);
address = kfd_get_process_doorbells(dev, process); if (!pdd)
return -EINVAL;
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(pdd);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP; VM_DONTDUMP | VM_PFNMAP;
...@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) ...@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
} }
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process, struct kfd_process_device *pdd,
unsigned int doorbell_id) unsigned int doorbell_id)
{ {
/* /*
...@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, ...@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
* units regardless of the ASIC-dependent doorbell size. * units regardless of the ASIC-dependent doorbell size.
*/ */
return kfd->doorbell_base_dw_offset + return kfd->doorbell_base_dw_offset +
process->doorbell_index pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) + * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
} }
...@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd) ...@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
} }
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
struct kfd_process *process)
{ {
return dev->doorbell_base + return pdd->dev->doorbell_base +
process->doorbell_index * kfd_doorbell_process_slice(dev); pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
} }
int kfd_alloc_process_doorbells(struct kfd_process *process) int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
{ {
int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices, int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
GFP_KERNEL); GFP_KERNEL);
if (r > 0) if (r > 0)
process->doorbell_index = r; *doorbell_index = r;
return r; return r;
} }
void kfd_free_process_doorbells(struct kfd_process *process) void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
{ {
if (process->doorbell_index) if (doorbell_index)
ida_simple_remove(&doorbell_ida, process->doorbell_index); ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
} }
...@@ -314,6 +314,9 @@ struct kfd_dev { ...@@ -314,6 +314,9 @@ struct kfd_dev {
spinlock_t smi_lock; spinlock_t smi_lock;
uint32_t reset_seq_num; uint32_t reset_seq_num;
struct ida doorbell_ida;
unsigned int max_doorbell_slices;
}; };
enum kfd_mempool { enum kfd_mempool {
...@@ -699,6 +702,7 @@ struct kfd_process_device { ...@@ -699,6 +702,7 @@ struct kfd_process_device {
struct attribute attr_evict; struct attribute attr_evict;
struct kobject *kobj_stats; struct kobject *kobj_stats;
unsigned int doorbell_index;
}; };
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
...@@ -736,7 +740,6 @@ struct kfd_process { ...@@ -736,7 +740,6 @@ struct kfd_process {
struct mmu_notifier mmu_notifier; struct mmu_notifier mmu_notifier;
uint16_t pasid; uint16_t pasid;
unsigned int doorbell_index;
/* /*
* List of kfd_process_device structures, * List of kfd_process_device structures,
...@@ -869,13 +872,13 @@ u32 read_kernel_doorbell(u32 __iomem *db); ...@@ -869,13 +872,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(void __iomem *db, u32 value); void write_kernel_doorbell(void __iomem *db, u32 value);
void write_kernel_doorbell64(void __iomem *db, u64 value); void write_kernel_doorbell64(void __iomem *db, u64 value);
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process, struct kfd_process_device *pdd,
unsigned int doorbell_id); unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
struct kfd_process *process); int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
int kfd_alloc_process_doorbells(struct kfd_process *process); unsigned int *doorbell_index);
void kfd_free_process_doorbells(struct kfd_process *process); void kfd_free_process_doorbells(struct kfd_dev *kfd,
unsigned int doorbell_index);
/* GTT Sub-Allocator */ /* GTT Sub-Allocator */
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
......
...@@ -876,6 +876,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) ...@@ -876,6 +876,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfree(pdd->qpd.doorbell_bitmap); kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr); idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
/* /*
* before destroying pdd, make sure to report availability * before destroying pdd, make sure to report availability
* for auto suspend * for auto suspend
...@@ -932,8 +934,6 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -932,8 +934,6 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_event_free_process(p); kfd_event_free_process(p);
kfd_pasid_free(p->pasid); kfd_pasid_free(p->pasid);
kfd_free_process_doorbells(p);
mutex_destroy(&p->mutex); mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread); put_task_struct(p->lead_thread);
...@@ -1111,9 +1111,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -1111,9 +1111,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (process->pasid == 0) if (process->pasid == 0)
goto err_alloc_pasid; goto err_alloc_pasid;
if (kfd_alloc_process_doorbells(process) < 0)
goto err_alloc_doorbells;
err = pqm_init(&process->pqm, process); err = pqm_init(&process->pqm, process);
if (err != 0) if (err != 0)
goto err_process_pqm_init; goto err_process_pqm_init;
...@@ -1141,8 +1138,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -1141,8 +1138,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
err_init_apertures: err_init_apertures:
pqm_uninit(&process->pqm); pqm_uninit(&process->pqm);
err_process_pqm_init: err_process_pqm_init:
kfd_free_process_doorbells(process);
err_alloc_doorbells:
kfd_pasid_free(process->pasid); kfd_pasid_free(process->pasid);
err_alloc_pasid: err_alloc_pasid:
mutex_destroy(&process->mutex); mutex_destroy(&process->mutex);
...@@ -1205,10 +1200,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, ...@@ -1205,10 +1200,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd) if (!pdd)
return NULL; return NULL;
if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
pr_err("Failed to alloc doorbell for pdd\n");
goto err_free_pdd;
}
if (init_doorbell_bitmap(&pdd->qpd, dev)) { if (init_doorbell_bitmap(&pdd->qpd, dev)) {
pr_err("Failed to init doorbell for process\n"); pr_err("Failed to init doorbell for process\n");
kfree(pdd); goto err_free_pdd;
return NULL;
} }
pdd->dev = dev; pdd->dev = dev;
...@@ -1231,6 +1230,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, ...@@ -1231,6 +1230,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
idr_init(&pdd->alloc_idr); idr_init(&pdd->alloc_idr);
return pdd; return pdd;
err_free_pdd:
kfree(pdd);
return NULL;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment