Commit 063e33c5 authored by Alex Sierra's avatar Alex Sierra Committed by Alex Deucher

drm/amdkfd: add xnack enabled flag to kfd_process

XNACK mode controls the SQ RETRY_DISABLE setting that determines,
whether recoverable page faults can be supported on GFXv9 hardware.
Only on Aldebaran we can support different processes running with
different XNACK modes. On older chips all processes must use the same
RETRY_DISABLE setting. However, processes not relying on recoverable
page faults can work with RETRY enabled. This means XNACK off is always
available as a fallback so we can use the same mode on all GPUs in a
process.
Signed-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9705c85f
...@@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm, ...@@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config = qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (dqm->dev->noretry &&
!dqm->dev->use_iommu_v2) if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
/* Aldebaran can safely support different XNACK modes
* per process
*/
if (!pdd->process->xnack_enabled)
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
} else if (dqm->dev->noretry &&
!dqm->dev->use_iommu_v2) {
qpd->sh_mem_config |= qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
}
qpd->sh_mem_ape1_limit = 0; qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0; qpd->sh_mem_ape1_base = 0;
......
...@@ -824,6 +824,8 @@ struct kfd_process { ...@@ -824,6 +824,8 @@ struct kfd_process {
/* shared virtual memory registered by this process */ /* shared virtual memory registered by this process */
struct svm_range_list svms; struct svm_range_list svms;
bool svm_disabled; bool svm_disabled;
bool xnack_enabled;
}; };
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
...@@ -883,6 +885,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, ...@@ -883,6 +885,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p); struct kfd_process *p);
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma); struct vm_area_struct *vma);
......
...@@ -1193,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, ...@@ -1193,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
} }
} }
bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
{
int i;
/* On most GFXv9 GPUs, the retry mode in the SQ must match the
* boot time retry setting. Mixing processes with different
* XNACK/retry settings can hang the GPU.
*
* Different GPUs can have different noretry settings depending
* on HW bugs or limitations. We need to find at least one
* XNACK mode for this process that's compatible with all GPUs.
* Fortunately GPUs with retry enabled (noretry=0) can run code
* built for XNACK-off. On GFXv9 it may perform slower.
*
* Therefore applications built for XNACK-off can always be
* supported and will be our fallback if any GPU does not
* support retry.
*/
for (i = 0; i < p->n_pdds; i++) {
struct kfd_dev *dev = p->pdds[i]->dev;
/* Only consider GFXv9 and higher GPUs. Older GPUs don't
* support the SVM APIs and don't need to be considered
* for the XNACK mode selection.
*/
if (dev->device_info->asic_family < CHIP_VEGA10)
continue;
/* Aldebaran can always support XNACK because it can support
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
if (supported &&
dev->device_info->asic_family == CHIP_ALDEBARAN)
continue;
/* GFXv10 and later GPUs do not support shader preemption
* during page faults. This can lead to poor QoS for queue
* management and memory-manager-related preemptions or
* even deadlocks.
*/
if (dev->device_info->asic_family >= CHIP_NAVI10)
return false;
if (dev->noretry)
return false;
}
return true;
}
/* /*
* On return the kfd_process is fully operational and will be freed when the * On return the kfd_process is fully operational and will be freed when the
* mm is released * mm is released
...@@ -1232,6 +1282,9 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -1232,6 +1282,9 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0) if (err != 0)
goto err_init_apertures; goto err_init_apertures;
/* Check XNACK support after PDDs are created in kfd_init_apertures */
process->xnack_enabled = kfd_process_xnack_mode(process, false);
err = svm_range_list_init(process); err = svm_range_list_init(process);
if (err) if (err)
goto err_init_svm_range_list; goto err_init_svm_range_list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment