Commit c99a2e7a authored by Alex Deucher's avatar Alex Deucher

drm/amdkfd: drop IOMMUv2 support

Now that we use the dGPU path for all APUs, drop the
IOMMUv2 support.

v2: drop the now unused queue manager functions for gfx7/8 APUs
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarMike Lothian <mike@fireburn.co.uk>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2b4adeb3
...@@ -226,16 +226,6 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) ...@@ -226,16 +226,6 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
kgd2kfd_suspend(adev->kfd.dev, run_pm); kgd2kfd_suspend(adev->kfd.dev, run_pm);
} }
int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd.dev)
r = kgd2kfd_resume_iommu(adev->kfd.dev);
return r;
}
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
{ {
int r = 0; int r = 0;
......
...@@ -148,7 +148,6 @@ int amdgpu_amdkfd_init(void); ...@@ -148,7 +148,6 @@ int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void); void amdgpu_amdkfd_fini(void);
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev);
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry); const void *ih_ring_entry);
...@@ -399,7 +398,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -399,7 +398,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources); const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_resume_iommu(struct kfd_dev *kfd);
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_post_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd);
...@@ -439,11 +437,6 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) ...@@ -439,11 +437,6 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{ {
} }
static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd)
{
return 0;
}
static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{ {
return 0; return 0;
......
...@@ -3236,12 +3236,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) ...@@ -3236,12 +3236,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
{ {
int r; int r;
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume_iommu(adev);
if (r)
return r;
}
r = amdgpu_device_ip_resume_phase1(adev); r = amdgpu_device_ip_resume_phase1(adev);
if (r) if (r)
return r; return r;
...@@ -4946,9 +4940,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, ...@@ -4946,9 +4940,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
dev_warn(tmp_adev->dev, "asic atom init failed!"); dev_warn(tmp_adev->dev, "asic atom init failed!");
} else { } else {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_amdkfd_resume_iommu(tmp_adev);
if (r)
goto out;
r = amdgpu_device_ip_resume_phase1(tmp_adev); r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r) if (r)
......
...@@ -755,20 +755,6 @@ module_param(debug_largebar, int, 0444); ...@@ -755,20 +755,6 @@ module_param(debug_largebar, int, 0444);
MODULE_PARM_DESC(debug_largebar, MODULE_PARM_DESC(debug_largebar,
"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
/**
* DOC: ignore_crat (int)
* Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
* table to get information about AMD APUs. This option can serve as a workaround on
* systems with a broken CRAT table.
*
* Default is auto (according to asic type, iommu_v2, and crat table, to decide
* whether use CRAT)
*/
int ignore_crat;
module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
/** /**
* DOC: halt_if_hws_hang (int) * DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang. * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
config HSA_AMD config HSA_AMD
bool "HSA kernel driver for AMD GPU devices" bool "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR select HMM_MIRROR
select MMU_NOTIFIER select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR select DRM_AMDGPU_USERPTR
......
...@@ -59,10 +59,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ ...@@ -59,10 +59,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_crat.o \ $(AMDKFD_PATH)/kfd_crat.o \
$(AMDKFD_PATH)/kfd_debug.o $(AMDKFD_PATH)/kfd_debug.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
endif
ifneq ($(CONFIG_DEBUG_FS),) ifneq ($(CONFIG_DEBUG_FS),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
endif endif
......
...@@ -1026,9 +1026,6 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) ...@@ -1026,9 +1026,6 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
return true; return true;
} }
if (dev->kfd->use_iommu_v2)
return false;
if (dev->local_mem_info.local_mem_size_private == 0 && if (dev->local_mem_info.local_mem_size_private == 0 &&
dev->local_mem_info.local_mem_size_public > 0) dev->local_mem_info.local_mem_size_public > 0)
return true; return true;
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#include "kfd_crat.h" #include "kfd_crat.h"
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_topology.h" #include "kfd_topology.h"
#include "kfd_iommu.h"
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
...@@ -1536,72 +1535,6 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc ...@@ -1536,72 +1535,6 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
return num_of_cache_types; return num_of_cache_types;
} }
static bool kfd_ignore_crat(void)
{
bool ret;
if (ignore_crat)
return true;
ret = true;
return ret;
}
/*
* kfd_create_crat_image_acpi - Allocates memory for CRAT image and
* copies CRAT from ACPI (if available).
* NOTE: Call kfd_destroy_crat_image to free CRAT image memory
*
* @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
* crat_image will be NULL
* @size: [OUT] size of crat_image
*
* Return 0 if successful else return error code
*/
int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
{
struct acpi_table_header *crat_table;
acpi_status status;
void *pcrat_image;
int rc = 0;
if (!crat_image)
return -EINVAL;
*crat_image = NULL;
if (kfd_ignore_crat()) {
pr_info("CRAT table disabled by module option\n");
return -ENODATA;
}
/* Fetch the CRAT table from ACPI */
status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
if (status == AE_NOT_FOUND) {
pr_info("CRAT table not found\n");
return -ENODATA;
} else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
pr_err("CRAT table error: %s\n", err);
return -EINVAL;
}
pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
if (!pcrat_image) {
rc = -ENOMEM;
goto out;
}
memcpy(pcrat_image, crat_table, crat_table->length);
*crat_image = pcrat_image;
*size = crat_table->length;
out:
acpi_put_table(crat_table);
return rc;
}
/* Memory required to create Virtual CRAT. /* Memory required to create Virtual CRAT.
* Since there is no easy way to predict the amount of memory required, the * Since there is no easy way to predict the amount of memory required, the
* following amount is allocated for GPU Virtual CRAT. This is * following amount is allocated for GPU Virtual CRAT. This is
...@@ -2169,12 +2102,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, ...@@ -2169,12 +2102,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->hsa_capability = 0; cu->hsa_capability = 0;
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
if (!kfd_iommu_check_device(kdev->kfd))
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
crat_table->length += sub_type_hdr->length; crat_table->length += sub_type_hdr->length;
crat_table->total_entries++; crat_table->total_entries++;
......
...@@ -307,7 +307,6 @@ struct kfd_gpu_cache_info { ...@@ -307,7 +307,6 @@ struct kfd_gpu_cache_info {
}; };
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info); int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info);
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
void kfd_destroy_crat_image(void *crat_image); void kfd_destroy_crat_image(void *crat_image);
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
uint32_t proximity_domain); uint32_t proximity_domain);
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include "kfd_pm4_headers_vi.h" #include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_headers_aldebaran.h" #include "kfd_pm4_headers_aldebaran.h"
#include "cwsr_trap_handler.h" #include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h" #include "kfd_smi_events.h"
#include "kfd_svm.h" #include "kfd_svm.h"
...@@ -62,7 +61,6 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, ...@@ -62,7 +61,6 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size); unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume_iommu(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_node *kfd); static int kfd_resume(struct kfd_node *kfd);
static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
...@@ -755,15 +753,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -755,15 +753,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->noretry = kfd->adev->gmc.noretry; kfd->noretry = kfd->adev->gmc.noretry;
/* If CRAT is broken, won't set iommu enabled */
kfd_double_confirm_iommu_support(kfd);
if (kfd_iommu_device_init(kfd)) {
kfd->use_iommu_v2 = false;
dev_err(kfd_device, "Error initializing iommuv2\n");
goto device_iommu_error;
}
kfd_cwsr_init(kfd); kfd_cwsr_init(kfd);
dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n", dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
...@@ -838,9 +827,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -838,9 +827,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_range_set_max_pages(kfd->adev); svm_range_set_max_pages(kfd->adev);
if (kfd_resume_iommu(kfd))
goto kfd_resume_iommu_error;
spin_lock_init(&kfd->watch_points_lock); spin_lock_init(&kfd->watch_points_lock);
kfd->init_complete = true; kfd->init_complete = true;
...@@ -852,11 +838,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -852,11 +838,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto out; goto out;
kfd_resume_iommu_error:
node_init_error: node_init_error:
node_alloc_error: node_alloc_error:
kfd_cleanup_nodes(kfd, i); kfd_cleanup_nodes(kfd, i);
device_iommu_error:
kfd_doorbell_fini(kfd); kfd_doorbell_fini(kfd);
kfd_doorbell_error: kfd_doorbell_error:
kfd_gtt_sa_fini(kfd); kfd_gtt_sa_fini(kfd);
...@@ -971,7 +955,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) ...@@ -971,7 +955,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
node = kfd->nodes[i]; node = kfd->nodes[i];
node->dqm->ops.stop(node->dqm); node->dqm->ops.stop(node->dqm);
} }
kfd_iommu_suspend(kfd);
} }
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
...@@ -1001,26 +984,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) ...@@ -1001,26 +984,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return ret; return ret;
} }
int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
{
if (!kfd->init_complete)
return 0;
return kfd_resume_iommu(kfd);
}
static int kfd_resume_iommu(struct kfd_dev *kfd)
{
int err = 0;
err = kfd_iommu_resume(kfd);
if (err)
dev_err(kfd_device,
"Failed to resume IOMMU for device %x:%x\n",
kfd->adev->pdev->vendor, kfd->adev->pdev->device);
return err;
}
static int kfd_resume(struct kfd_node *node) static int kfd_resume(struct kfd_node *node)
{ {
int err = 0; int err = 0;
......
...@@ -2560,7 +2560,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) ...@@ -2560,7 +2560,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
switch (dev->adev->asic_type) { switch (dev->adev->asic_type) {
case CHIP_KAVERI: case CHIP_KAVERI:
case CHIP_HAWAII: case CHIP_HAWAII:
device_queue_manager_init_cik_hawaii(&dqm->asic_ops); device_queue_manager_init_cik(&dqm->asic_ops);
break; break;
case CHIP_CARRIZO: case CHIP_CARRIZO:
...@@ -2570,7 +2570,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) ...@@ -2570,7 +2570,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
case CHIP_POLARIS11: case CHIP_POLARIS11:
case CHIP_POLARIS12: case CHIP_POLARIS12:
case CHIP_VEGAM: case CHIP_VEGAM:
device_queue_manager_init_vi_tonga(&dqm->asic_ops); device_queue_manager_init_vi(&dqm->asic_ops);
break; break;
default: default:
......
...@@ -269,12 +269,8 @@ struct device_queue_manager { ...@@ -269,12 +269,8 @@ struct device_queue_manager {
void device_queue_manager_init_cik( void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops); struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_cik_hawaii(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi( void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops); struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v9( void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops); struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v10_navi10( void device_queue_manager_init_v10_navi10(
......
...@@ -34,17 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, ...@@ -34,17 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
void __user *alternate_aperture_base, void __user *alternate_aperture_base,
uint64_t alternate_aperture_size); uint64_t alternate_aperture_size);
static int update_qpd_cik(struct device_queue_manager *dqm, static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, static void init_sdma_vm(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct queue *q,
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd);
struct qcm_process_device *qpd);
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_cik( void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops) struct device_queue_manager_asic_ops *asic_ops)
{ {
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik; asic_ops->update_qpd = update_qpd_cik;
...@@ -52,15 +48,6 @@ void device_queue_manager_init_cik( ...@@ -52,15 +48,6 @@ void device_queue_manager_init_cik(
asic_ops->mqd_manager_init = mqd_manager_init_cik; asic_ops->mqd_manager_init = mqd_manager_init_cik;
} }
void device_queue_manager_init_cik_hawaii(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik_hawaii;
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{ {
/* In 64-bit mode, we can only control the top 3 bits of the LDS, /* In 64-bit mode, we can only control the top 3 bits of the LDS,
...@@ -115,41 +102,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, ...@@ -115,41 +102,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
} }
static int update_qpd_cik(struct device_queue_manager *dqm, static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
DEFAULT_MTYPE(MTYPE_NONCACHED) |
APE1_MTYPE(MTYPE_NONCACHED);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(pdd);
qpd->sh_mem_bases = SHARED_BASE(temp);
qpd->sh_mem_config |= PTR32;
} else {
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
}
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
}
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{ {
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
unsigned int temp; unsigned int temp;
...@@ -178,25 +131,9 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, ...@@ -178,25 +131,9 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
return 0; return 0;
} }
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, static void init_sdma_vm(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct queue *q,
{ struct qcm_process_device *qpd)
uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
if (q->process->is_32bit_user_mode)
value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
q->properties.sdma_vm_addr = value;
}
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{ {
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses. * aperture addresses.
......
...@@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, ...@@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (dqm->dev->kfd->noretry && !dqm->dev->kfd->use_iommu_v2) if (dqm->dev->kfd->noretry)
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3)) if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3))
......
...@@ -28,29 +28,19 @@ ...@@ -28,29 +28,19 @@
#include "oss/oss_3_0_sh_mask.h" #include "oss/oss_3_0_sh_mask.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
enum cache_policy default_policy, enum cache_policy default_policy,
enum cache_policy alternate_policy, enum cache_policy alternate_policy,
void __user *alternate_aperture_base, void __user *alternate_aperture_base,
uint64_t alternate_aperture_size); uint64_t alternate_aperture_size);
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int update_qpd_vi(struct device_queue_manager *dqm, static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int update_qpd_vi_tonga(struct device_queue_manager *dqm, static void init_sdma_vm(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct queue *q,
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd);
struct qcm_process_device *qpd);
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_vi( void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops) struct device_queue_manager_asic_ops *asic_ops)
{ {
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
asic_ops->update_qpd = update_qpd_vi; asic_ops->update_qpd = update_qpd_vi;
...@@ -58,15 +48,6 @@ void device_queue_manager_init_vi( ...@@ -58,15 +48,6 @@ void device_queue_manager_init_vi(
asic_ops->mqd_manager_init = mqd_manager_init_vi; asic_ops->mqd_manager_init = mqd_manager_init_vi;
} }
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
asic_ops->update_qpd = update_qpd_vi_tonga;
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{ {
/* In 64-bit mode, we can only control the top 3 bits of the LDS, /* In 64-bit mode, we can only control the top 3 bits of the LDS,
...@@ -96,35 +77,6 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) ...@@ -96,35 +77,6 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
} }
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_CC :
MTYPE_NC;
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
MTYPE_CC :
MTYPE_NC;
qpd->sh_mem_config = (qpd->sh_mem_config &
SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
return true;
}
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
enum cache_policy default_policy, enum cache_policy default_policy,
enum cache_policy alternate_policy, enum cache_policy alternate_policy,
...@@ -152,48 +104,7 @@ static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, ...@@ -152,48 +104,7 @@ static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
} }
static int update_qpd_vi(struct device_queue_manager *dqm, static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(pdd);
qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
} else {
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
qpd->sh_mem_config |= 1 <<
SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
}
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
}
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{ {
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
unsigned int temp; unsigned int temp;
...@@ -226,25 +137,9 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm, ...@@ -226,25 +137,9 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
return 0; return 0;
} }
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, static void init_sdma_vm(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct queue *q,
{ struct qcm_process_device *qpd)
uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
if (q->process->is_32bit_user_mode)
value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
q->properties.sdma_vm_addr = value;
}
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{ {
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses. * aperture addresses.
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include <linux/memory.h> #include <linux/memory.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_events.h" #include "kfd_events.h"
#include "kfd_iommu.h"
#include <linux/device.h> #include <linux/device.h>
/* /*
...@@ -1146,87 +1145,6 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ...@@ -1146,87 +1145,6 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
rcu_read_unlock(); rcu_read_unlock();
} }
#ifdef KFD_SUPPORT_IOMMU_V2
void kfd_signal_iommu_event(struct kfd_node *dev, u32 pasid,
unsigned long address, bool is_write_requested,
bool is_execute_requested)
{
struct kfd_hsa_memory_exception_data memory_exception_data;
struct vm_area_struct *vma;
int user_gpu_id;
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
struct mm_struct *mm;
if (!p)
return; /* Presumably process exited. */
/* Take a safe reference to the mm_struct, which may otherwise
* disappear even while the kfd_process is still referenced.
*/
mm = get_task_mm(p->lead_thread);
if (!mm) {
kfd_unref_process(p);
return; /* Process is exiting */
}
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
return;
}
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
mmap_read_lock(mm);
vma = find_vma(mm, address);
memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.va = address;
/* Set failure reason */
memory_exception_data.failure.NotPresent = 1;
memory_exception_data.failure.NoExecute = 0;
memory_exception_data.failure.ReadOnly = 0;
if (vma && address >= vma->vm_start) {
memory_exception_data.failure.NotPresent = 0;
if (is_write_requested && !(vma->vm_flags & VM_WRITE))
memory_exception_data.failure.ReadOnly = 1;
else
memory_exception_data.failure.ReadOnly = 0;
if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
memory_exception_data.failure.NoExecute = 1;
else
memory_exception_data.failure.NoExecute = 0;
}
mmap_read_unlock(mm);
mmput(mm);
pr_debug("notpresent %d, noexecute %d, readonly %d\n",
memory_exception_data.failure.NotPresent,
memory_exception_data.failure.NoExecute,
memory_exception_data.failure.ReadOnly);
/* Workaround on Raven to not kill the process when memory is freed
* before IOMMU is able to finish processing all the excessive PPRs
*/
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
&memory_exception_data);
kfd_unref_process(p);
}
#endif /* KFD_SUPPORT_IOMMU_V2 */
void kfd_signal_hw_exception_event(u32 pasid) void kfd_signal_hw_exception_event(u32 pasid)
{ {
/* /*
......
...@@ -322,28 +322,19 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) ...@@ -322,28 +322,19 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_VI(); pdd->lds_base = MAKE_LDS_APP_BASE_VI();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
if (!pdd->dev->kfd->use_iommu_v2) { /* dGPUs: SVM aperture starting at 0
/* dGPUs: SVM aperture starting at 0 * with small reserved space for kernel.
* with small reserved space for kernel. * Set them to CANONICAL addresses.
* Set them to CANONICAL addresses. */
*/ pdd->gpuvm_base = SVM_USER_BASE;
pdd->gpuvm_base = SVM_USER_BASE; pdd->gpuvm_limit =
pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1;
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
/* dGPUs: the reserved space for kernel /* dGPUs: the reserved space for kernel
* before SVM * before SVM
*/ */
pdd->qpd.cwsr_base = SVM_CWSR_BASE; pdd->qpd.cwsr_base = SVM_CWSR_BASE;
pdd->qpd.ib_base = SVM_IB_BASE; pdd->qpd.ib_base = SVM_IB_BASE;
} else {
/* set them to non CANONICAL addresses, and no SVM is
* allocated.
*/
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
pdd->dev->kfd->shared_resources.gpuvm_size);
}
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
...@@ -361,13 +352,11 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) ...@@ -361,13 +352,11 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
if (!pdd->dev->kfd->use_iommu_v2) { /*
/* * Place TBA/TMA on opposite side of VM hole to prevent
* Place TBA/TMA on opposite side of VM hole to prevent * stray faults from triggering SVM on these pages.
* stray faults from triggering SVM on these pages. */
*/ pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
}
} }
int kfd_init_apertures(struct kfd_process *process) int kfd_init_apertures(struct kfd_process *process)
......
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/kconfig.h>
#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
#include <linux/printk.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
/** kfd_iommu_check_device - Check whether IOMMU is available for device
*/
int kfd_iommu_check_device(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
int err;
if (!kfd->use_iommu_v2)
return -ENODEV;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
if (err)
return err;
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
return -ENODEV;
return 0;
}
/** kfd_iommu_device_init - Initialize IOMMU for device
*/
int kfd_iommu_device_init(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
if (!kfd->use_iommu_v2)
return 0;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return -ENODEV;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device,
"error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return -ENODEV;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info.max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return -EBUSY;
}
return 0;
}
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
*
* Binds the given process to the given device using its PASID. This
* enables IOMMUv2 address translation for the process on the device.
*
* This function assumes that the process mutex is held.
*/
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
{
struct kfd_node *dev = pdd->dev;
struct kfd_process *p = pdd->process;
int err;
if (!dev->kfd->use_iommu_v2 || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return -EINVAL;
}
if (!kfd_is_first_node(dev)) {
dev_warn_once(kfd_device,
"IOMMU supported only on first node\n");
return 0;
}
err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread);
if (!err)
pdd->bound = PDD_BOUND;
return err;
}
/** kfd_iommu_unbind_process - Unbind process from all devices
*
* This removes all IOMMU device bindings of the process. To be used
* before process termination.
*/
void kfd_iommu_unbind_process(struct kfd_process *p)
{
int i;
for (i = 0; i < p->n_pdds; i++)
if ((p->pdds[i]->bound == PDD_BOUND) &&
(kfd_is_first_node((p->pdds[i]->dev))))
amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev,
p->pasid);
}
/* Callback for process shutdown invoked by the IOMMU driver */
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
{
struct kfd_node *dev = kfd_device_by_pci_dev(pdev);
struct kfd_process *p;
struct kfd_process_device *pdd;
if (!dev)
return;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
kfd_unref_process(p);
}
/* This function called by IOMMU driver on PPR failure */
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid,
unsigned long address, u16 flags)
{
struct kfd_node *dev;
dev_warn_ratelimited(kfd_device,
"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
static int kfd_bind_processes_to_device(struct kfd_node *knode)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(knode, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(knode->adev->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid 0x%x binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
static void kfd_unbind_processes_from_device(struct kfd_node *knode)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(knode, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
/** kfd_iommu_suspend - Prepare IOMMU for suspend
*
* This unbinds processes from the device and disables the IOMMU for
* the device.
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
if (!kfd->use_iommu_v2)
return;
kfd_unbind_processes_from_device(kfd->nodes[0]);
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
amd_iommu_free_device(kfd->adev->pdev);
}
/** kfd_iommu_resume - Restore IOMMU after resume
*
* This reinitializes the IOMMU for the device and re-binds previously
* suspended processes to the device.
*/
int kfd_iommu_resume(struct kfd_dev *kfd)
{
unsigned int pasid_limit;
int err;
if (!kfd->use_iommu_v2)
return 0;
pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->adev->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd->nodes[0]);
if (err) {
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
amd_iommu_free_device(kfd->adev->pdev);
return err;
}
return 0;
}
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
*/
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
struct kfd_perf_properties *props;
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
return 0;
if (!amd_iommu_pc_supported())
return 0;
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
return 0;
}
#endif
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __KFD_IOMMU_H__
#define __KFD_IOMMU_H__
#include <linux/kconfig.h>
#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
#define KFD_SUPPORT_IOMMU_V2
int kfd_iommu_check_device(struct kfd_dev *kfd);
int kfd_iommu_device_init(struct kfd_dev *kfd);
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
void kfd_iommu_unbind_process(struct kfd_process *p);
void kfd_iommu_suspend(struct kfd_dev *kfd);
int kfd_iommu_resume(struct kfd_dev *kfd);
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
#else
static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
{
return -ENODEV;
}
static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
{
#if IS_MODULE(CONFIG_AMD_IOMMU_V2)
WARN_ONCE(1, "iommu_v2 module is not usable by built-in KFD");
#endif
return 0;
}
static inline int kfd_iommu_bind_process_to_device(
struct kfd_process_device *pdd)
{
return 0;
}
static inline void kfd_iommu_unbind_process(struct kfd_process *p)
{
/* empty */
}
static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
{
/* empty */
}
static inline int kfd_iommu_resume(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
return 0;
}
#endif /* IS_REACHABLE(CONFIG_AMD_IOMMU_V2) */
#endif /* __KFD_IOMMU_H__ */
...@@ -206,13 +206,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -206,13 +206,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q); q->is_active = QUEUE_IS_ACTIVE(*q);
} }
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
{
__update_mqd(mm, mqd, q, minfo, 1);
}
static uint32_t read_doorbell_id(void *mqd) static uint32_t read_doorbell_id(void *mqd)
{ {
struct cik_mqd *m = (struct cik_mqd *)mqd; struct cik_mqd *m = (struct cik_mqd *)mqd;
...@@ -220,9 +213,9 @@ static uint32_t read_doorbell_id(void *mqd) ...@@ -220,9 +213,9 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0; return m->queue_doorbell_id0;
} }
static void update_mqd_hawaii(struct mqd_manager *mm, void *mqd, static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, struct queue_properties *q,
struct mqd_update_info *minfo) struct mqd_update_info *minfo)
{ {
__update_mqd(mm, mqd, q, minfo, 0); __update_mqd(mm, mqd, q, minfo, 0);
} }
...@@ -387,7 +380,6 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) ...@@ -387,7 +380,6 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
#endif #endif
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev) struct kfd_node *dev)
{ {
...@@ -470,16 +462,3 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, ...@@ -470,16 +462,3 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
return mqd; return mqd;
} }
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
struct kfd_node *dev)
{
struct mqd_manager *mqd;
mqd = mqd_manager_init_cik(type, dev);
if (!mqd)
return NULL;
if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_hawaii;
return mqd;
}
...@@ -237,14 +237,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -237,14 +237,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q); q->is_active = QUEUE_IS_ACTIVE(*q);
} }
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
{
__update_mqd(mm, mqd, q, minfo, MTYPE_CC, 1);
}
static uint32_t read_doorbell_id(void *mqd) static uint32_t read_doorbell_id(void *mqd)
{ {
struct vi_mqd *m = (struct vi_mqd *)mqd; struct vi_mqd *m = (struct vi_mqd *)mqd;
...@@ -252,9 +244,9 @@ static uint32_t read_doorbell_id(void *mqd) ...@@ -252,9 +244,9 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0; return m->queue_doorbell_id0;
} }
static void update_mqd_tonga(struct mqd_manager *mm, void *mqd, static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, struct queue_properties *q,
struct mqd_update_info *minfo) struct mqd_update_info *minfo)
{ {
__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0); __update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
} }
...@@ -529,16 +521,3 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, ...@@ -529,16 +521,3 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
return mqd; return mqd;
} }
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_node *dev)
{
struct mqd_manager *mqd;
mqd = mqd_manager_init_vi(type, dev);
if (!mqd)
return NULL;
if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_tonga;
return mqd;
}
...@@ -175,12 +175,6 @@ extern int send_sigterm; ...@@ -175,12 +175,6 @@ extern int send_sigterm;
*/ */
extern int debug_largebar; extern int debug_largebar;
/*
* Ignore CRAT table during KFD initialization, can be used to work around
* broken CRAT tables on some AMD systems
*/
extern int ignore_crat;
/* Set sh_mem_config.retry_disable on GFX v9 */ /* Set sh_mem_config.retry_disable on GFX v9 */
extern int amdgpu_noretry; extern int amdgpu_noretry;
...@@ -234,7 +228,6 @@ struct kfd_device_info { ...@@ -234,7 +228,6 @@ struct kfd_device_info {
uint8_t num_of_watch_points; uint8_t num_of_watch_points;
uint16_t mqd_size_aligned; uint16_t mqd_size_aligned;
bool supports_cwsr; bool supports_cwsr;
bool needs_iommu_device;
bool needs_pci_atomics; bool needs_pci_atomics;
uint32_t no_atomic_fw_version; uint32_t no_atomic_fw_version;
unsigned int num_sdma_queues_per_engine; unsigned int num_sdma_queues_per_engine;
...@@ -357,9 +350,6 @@ struct kfd_dev { ...@@ -357,9 +350,6 @@ struct kfd_dev {
bool pci_atomic_requested; bool pci_atomic_requested;
/* Use IOMMU v2 flag */
bool use_iommu_v2;
/* Compute Profile ref. count */ /* Compute Profile ref. count */
atomic_t compute_profile; atomic_t compute_profile;
...@@ -1149,7 +1139,6 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, ...@@ -1149,7 +1139,6 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
} }
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev); int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id); int kfd_numa_node_to_apic_id(int numa_node_id);
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
/* Interrupts */ /* Interrupts */
#define KFD_IRQ_FENCE_CLIENTID 0xff #define KFD_IRQ_FENCE_CLIENTID 0xff
...@@ -1296,12 +1285,8 @@ void print_queue(struct queue *q); ...@@ -1296,12 +1285,8 @@ void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev); struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_node *dev); struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
struct kfd_node *dev); struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
...@@ -1456,9 +1441,6 @@ int kfd_wait_on_events(struct kfd_process *p, ...@@ -1456,9 +1441,6 @@ int kfd_wait_on_events(struct kfd_process *p,
uint32_t *wait_result); uint32_t *wait_result);
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint32_t valid_id_bits); uint32_t valid_id_bits);
void kfd_signal_iommu_event(struct kfd_node *dev,
u32 pasid, unsigned long address,
bool is_write_requested, bool is_execute_requested);
void kfd_signal_hw_exception_event(u32 pasid); void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/mmu_context.h> #include <linux/mmu_context.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/mman.h> #include <linux/mman.h>
...@@ -41,7 +40,6 @@ struct mm_struct; ...@@ -41,7 +40,6 @@ struct mm_struct;
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h" #include "kfd_svm.h"
#include "kfd_smi_events.h" #include "kfd_smi_events.h"
#include "kfd_debug.h" #include "kfd_debug.h"
...@@ -1122,7 +1120,6 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -1122,7 +1120,6 @@ static void kfd_process_wq_release(struct work_struct *work)
dma_fence_signal(p->ef); dma_fence_signal(p->ef);
kfd_process_remove_sysfs(p); kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
kfd_process_kunmap_signal_bo(p); kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p); kfd_process_free_outstanding_kfd_bos(p);
...@@ -1728,10 +1725,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, ...@@ -1728,10 +1725,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
} }
} }
err = kfd_iommu_bind_process_to_device(pdd);
if (err)
goto out;
/* /*
* make sure that runtime_usage counter is incremented just once * make sure that runtime_usage counter is incremented just once
* per pdd * per pdd
...@@ -1739,15 +1732,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, ...@@ -1739,15 +1732,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
pdd->runtime_inuse = true; pdd->runtime_inuse = true;
return pdd; return pdd;
out:
/* balance runpm reference count and exit with error */
if (!pdd->runtime_inuse) {
pm_runtime_mark_last_busy(adev_to_drm(dev->adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
}
return ERR_PTR(err);
} }
/* Create specific handle mapped to mem from process local memory idr /* Create specific handle mapped to mem from process local memory idr
......
...@@ -36,7 +36,6 @@ ...@@ -36,7 +36,6 @@
#include "kfd_crat.h" #include "kfd_crat.h"
#include "kfd_topology.h" #include "kfd_topology.h"
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h" #include "kfd_svm.h"
#include "kfd_debug.h" #include "kfd_debug.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
...@@ -989,17 +988,6 @@ static void find_system_memory(const struct dmi_header *dm, ...@@ -989,17 +988,6 @@ static void find_system_memory(const struct dmi_header *dm,
} }
} }
/*
* Performance counters information is not part of CRAT but we would like to
* put them in the sysfs under topology directory for Thunk to get the data.
* This function is called before updating the sysfs.
*/
static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
{
/* These are the only counters supported so far */
return kfd_iommu_add_perf_counters(kdev);
}
/* kfd_add_non_crat_information - Add information that is not currently /* kfd_add_non_crat_information - Add information that is not currently
* defined in CRAT but is necessary for KFD topology * defined in CRAT but is necessary for KFD topology
* @dev - topology device to which addition info is added * @dev - topology device to which addition info is added
...@@ -1014,25 +1002,6 @@ static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) ...@@ -1014,25 +1002,6 @@ static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
/* TODO: For GPU node, rearrange code from kfd_topology_add_device */ /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
} }
/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
* Ignore CRAT for all other devices. AMD APU is identified if both CPU
* and GPU cores are present.
* @device_list - topology device list created by parsing ACPI CRAT table.
* @return - TRUE if invalid, FALSE is valid.
*/
static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
{
struct kfd_topology_device *dev;
list_for_each_entry(dev, device_list, list) {
if (dev->node_props.cpu_cores_count &&
dev->node_props.simd_count)
return false;
}
pr_info("Ignoring ACPI CRAT on non-APU system\n");
return true;
}
int kfd_topology_init(void) int kfd_topology_init(void)
{ {
void *crat_image = NULL; void *crat_image = NULL;
...@@ -1063,48 +1032,25 @@ int kfd_topology_init(void) ...@@ -1063,48 +1032,25 @@ int kfd_topology_init(void)
*/ */
proximity_domain = 0; proximity_domain = 0;
/* ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
* Get the CRAT image from the ACPI. If ACPI doesn't have one COMPUTE_UNIT_CPU, NULL,
* or if ACPI CRAT is invalid create a virtual CRAT. proximity_domain);
* NOTE: The current implementation expects all AMD APUs to have cpu_only_node = 1;
* CRAT. If no CRAT is available, it is assumed to be a CPU if (ret) {
*/ pr_err("Error creating VCRAT table for CPU\n");
ret = kfd_create_crat_image_acpi(&crat_image, &image_size); return ret;
if (!ret) {
ret = kfd_parse_crat_table(crat_image,
&temp_topology_device_list,
proximity_domain);
if (ret ||
kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
kfd_release_topology_device_list(
&temp_topology_device_list);
kfd_destroy_crat_image(crat_image);
crat_image = NULL;
}
} }
if (!crat_image) { ret = kfd_parse_crat_table(crat_image,
ret = kfd_create_crat_image_virtual(&crat_image, &image_size, &temp_topology_device_list,
COMPUTE_UNIT_CPU, NULL, proximity_domain);
proximity_domain); if (ret) {
cpu_only_node = 1; pr_err("Error parsing VCRAT table for CPU\n");
if (ret) { goto err;
pr_err("Error creating VCRAT table for CPU\n");
return ret;
}
ret = kfd_parse_crat_table(crat_image,
&temp_topology_device_list,
proximity_domain);
if (ret) {
pr_err("Error parsing VCRAT table for CPU\n");
goto err;
}
} }
kdev = list_first_entry(&temp_topology_device_list, kdev = list_first_entry(&temp_topology_device_list,
struct kfd_topology_device, list); struct kfd_topology_device, list);
kfd_add_perf_to_topology(kdev);
down_write(&topology_lock); down_write(&topology_lock);
kfd_topology_update_device_list(&temp_topology_device_list, kfd_topology_update_device_list(&temp_topology_device_list,
...@@ -1190,8 +1136,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) ...@@ -1190,8 +1136,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu)
/* Discrete GPUs need their own topology device list /* Discrete GPUs need their own topology device list
* entries. Don't assign them to CPU/APU nodes. * entries. Don't assign them to CPU/APU nodes.
*/ */
if (!gpu->kfd->use_iommu_v2 && if (dev->node_props.cpu_cores_count)
dev->node_props.cpu_cores_count)
continue; continue;
if (!dev->gpu && (dev->node_props.simd_count > 0)) { if (!dev->gpu && (dev->node_props.simd_count > 0)) {
...@@ -2081,10 +2026,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) ...@@ -2081,10 +2026,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
* Overwrite ATS capability according to needs_iommu_device to fix * Overwrite ATS capability according to needs_iommu_device to fix
* potential missing corresponding bit in CRAT of BIOS. * potential missing corresponding bit in CRAT of BIOS.
*/ */
if (dev->gpu->kfd->use_iommu_v2) dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
else
dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
/* Fix errors in CZ CRAT. /* Fix errors in CZ CRAT.
* simd_count: Carrizo CRAT reports wrong simd_count, probably * simd_count: Carrizo CRAT reports wrong simd_count, probably
...@@ -2279,29 +2221,6 @@ int kfd_numa_node_to_apic_id(int numa_node_id) ...@@ -2279,29 +2221,6 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
} }
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
{
struct kfd_topology_device *dev;
gpu->use_iommu_v2 = false;
if (!gpu->device_info.needs_iommu_device)
return;
down_read(&topology_lock);
/* Only use IOMMUv2 if there is an APU topology node with no GPU
* assigned yet. This GPU will be assigned to it.
*/
list_for_each_entry(dev, &topology_device_list, list)
if (dev->node_props.cpu_cores_count &&
dev->node_props.simd_count &&
!dev->gpu)
gpu->use_iommu_v2 = true;
up_read(&topology_lock);
}
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment