Commit 1b4670f6 authored by Oak Zeng's avatar Oak Zeng Committed by Alex Deucher

drm/amdkfd: Introduce XGMI SDMA queue type

Existing QUEUE_TYPE_SDMA means PCIe optimized SDMA queues.
Introduce a new QUEUE_TYPE_SDMA_XGMI, which is optimized
for non-PCIe transfer such as XGMI.
Signed-off-by: default avatarOak Zeng <Oak.Zeng@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 065e4bdf
...@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, ...@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_COMPUTE; q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
q_properties->type = KFD_QUEUE_TYPE_SDMA; q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
else else
return -ENOTSUPP; return -ENOTSUPP;
......
...@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = { ...@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
.needs_iommu_device = true, .needs_iommu_device = true,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = { ...@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
.needs_iommu_device = true, .needs_iommu_device = true,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = { ...@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
.needs_iommu_device = true, .needs_iommu_device = true,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 1, .num_sdma_engines = 1,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
#endif #endif
...@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = { ...@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = { ...@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = { ...@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = { ...@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = { ...@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { ...@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = { ...@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = { ...@@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = { ...@@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = { ...@@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = { ...@@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2, .num_sdma_queues_per_engine = 2,
}; };
...@@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = { ...@@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = {
.needs_iommu_device = false, .needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
.num_sdma_engines = 2, .num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8, .num_sdma_queues_per_engine = 8,
}; };
......
...@@ -181,10 +181,12 @@ struct device_queue_manager { ...@@ -181,10 +181,12 @@ struct device_queue_manager {
unsigned int processes_count; unsigned int processes_count;
unsigned int queue_count; unsigned int queue_count;
unsigned int sdma_queue_count; unsigned int sdma_queue_count;
unsigned int xgmi_sdma_queue_count;
unsigned int total_queue_count; unsigned int total_queue_count;
unsigned int next_pipe_to_allocate; unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues; unsigned int *allocated_queues;
uint64_t sdma_bitmap; uint64_t sdma_bitmap;
uint64_t xgmi_sdma_bitmap;
unsigned int vmid_bitmap; unsigned int vmid_bitmap;
uint64_t pipelines_addr; uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem; struct kfd_mem_obj *pipeline_mem;
...@@ -216,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm); ...@@ -216,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{ {
......
...@@ -175,6 +175,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, ...@@ -175,6 +175,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi; queue_type__mes_map_queues__debug_interface_queue_vi;
break; break;
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id + packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi; engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */ use_static = false; /* no static queues under SDMA */
...@@ -221,6 +222,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, ...@@ -221,6 +222,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute; engine_sel__mes_unmap_queues__compute;
break; break;
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine; engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break; break;
......
...@@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, ...@@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi; queue_type__mes_map_queues__debug_interface_queue_vi;
break; break;
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id + packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi; engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */ use_static = false; /* no static queues under SDMA */
...@@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, ...@@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute; engine_sel__mes_unmap_queues__compute;
break; break;
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine; engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break; break;
......
...@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, ...@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
process_count = pm->dqm->processes_count; process_count = pm->dqm->processes_count;
queue_count = pm->dqm->queue_count; queue_count = pm->dqm->queue_count;
compute_queue_count = queue_count - pm->dqm->sdma_queue_count; compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
pm->dqm->xgmi_sdma_queue_count;
/* check if there is over subscription /* check if there is over subscription
* Note: the arbitration between the number of VMIDs and * Note: the arbitration between the number of VMIDs and
......
...@@ -188,6 +188,7 @@ struct kfd_device_info { ...@@ -188,6 +188,7 @@ struct kfd_device_info {
bool needs_iommu_device; bool needs_iommu_device;
bool needs_pci_atomics; bool needs_pci_atomics;
unsigned int num_sdma_engines; unsigned int num_sdma_engines;
unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine; unsigned int num_sdma_queues_per_engine;
}; };
...@@ -329,7 +330,8 @@ enum kfd_queue_type { ...@@ -329,7 +330,8 @@ enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_SDMA,
KFD_QUEUE_TYPE_HIQ, KFD_QUEUE_TYPE_HIQ,
KFD_QUEUE_TYPE_DIQ KFD_QUEUE_TYPE_DIQ,
KFD_QUEUE_TYPE_SDMA_XGMI
}; };
enum kfd_queue_format { enum kfd_queue_format {
......
...@@ -186,8 +186,13 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -186,8 +186,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) { switch (type) {
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { case KFD_QUEUE_TYPE_SDMA_XGMI:
pr_err("Over-subscription is not allowed for SDMA.\n"); if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
>= get_num_sdma_queues(dev->dqm)) ||
(type == KFD_QUEUE_TYPE_SDMA_XGMI &&
dev->dqm->xgmi_sdma_queue_count
>= get_num_xgmi_sdma_queues(dev->dqm))) {
pr_debug("Over-subscription is not allowed for SDMA.\n");
retval = -EPERM; retval = -EPERM;
goto err_create_queue; goto err_create_queue;
} }
...@@ -446,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -446,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
q = pqn->q; q = pqn->q;
switch (q->properties.type) { switch (q->properties.type) {
case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
seq_printf(m, " SDMA queue on device %x\n", seq_printf(m, " SDMA queue on device %x\n",
q->device->id); q->device->id);
mqd_type = KFD_MQD_TYPE_SDMA; mqd_type = KFD_MQD_TYPE_SDMA;
......
...@@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args { ...@@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args {
}; };
/* For kfd_ioctl_create_queue_args.queue_type. */ /* For kfd_ioctl_create_queue_args.queue_type. */
#define KFD_IOC_QUEUE_TYPE_COMPUTE 0 #define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
#define KFD_IOC_QUEUE_TYPE_SDMA 1 #define KFD_IOC_QUEUE_TYPE_SDMA 0x1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
#define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PERCENTAGE 100
#define KFD_MAX_QUEUE_PRIORITY 15 #define KFD_MAX_QUEUE_PRIORITY 15
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment