Commit 228ce176 authored by Rajneesh Bhardwaj's avatar Rajneesh Bhardwaj Committed by Alex Deucher

drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3

[For 1P NPS1 mode driver bringup]

Changes required to initialize the amdgpu driver with frontdoor firmware
loading and discovery=2 with the native mode SBIOS that enables CPU GPU
unified interleaved memory.

sudo modprobe amdgpu discovery=2

Once PSP TMR region is reported via the ACPI interface, the dependency
on the ip_discovery.bin will be removed.

Choice of where to allocate driver table is given to each IP version. In
general, both GTT and VRAM domains will be considered. If one of the
tables has a strict restriction for VRAM domain, then only VRAM domain
is considered.
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
(lijo: Modified the handling for SMU Tables)
Signed-off-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9faf929f
...@@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, ...@@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
(*mem)->dmabuf = dma_buf; (*mem)->dmabuf = dma_buf;
(*mem)->bo = bo; (*mem)->bo = bo;
(*mem)->va = va; (*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0; (*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info; (*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
......
...@@ -1044,7 +1044,7 @@ static const char * const amdgpu_vram_names[] = { ...@@ -1044,7 +1044,7 @@ static const char * const amdgpu_vram_names[] = {
int amdgpu_bo_init(struct amdgpu_device *adev) int amdgpu_bo_init(struct amdgpu_device *adev)
{ {
/* On A+A platform, VRAM can be mapped as WB */ /* On A+A platform, VRAM can be mapped as WB */
if (!adev->gmc.xgmi.connected_to_cpu) { if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
/* reserve PAT memory space to WC for VRAM */ /* reserve PAT memory space to WC for VRAM */
int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
adev->gmc.aper_size); adev->gmc.aper_size);
......
...@@ -476,7 +476,8 @@ static int psp_sw_init(void *handle) ...@@ -476,7 +476,8 @@ static int psp_sw_init(void *handle)
return ret; return ret;
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&psp->fence_buf_bo, &psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf_mc_addr,
&psp->fence_buf); &psp->fence_buf);
...@@ -484,7 +485,8 @@ static int psp_sw_init(void *handle) ...@@ -484,7 +485,8 @@ static int psp_sw_init(void *handle)
goto failed1; goto failed1;
ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
(void **)&psp->cmd_buf_mem); (void **)&psp->cmd_buf_mem);
if (ret) if (ret)
......
...@@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ...@@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
} }
ret = amdgpu_bo_create_kernel_at(adev, if (!adev->gmc.is_app_apu) {
adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, ret = amdgpu_bo_create_kernel_at(adev,
adev->mman.discovery_tmr_size, adev->gmc.real_vram_size -
&adev->mman.discovery_memory, adev->mman.discovery_tmr_size,
NULL); adev->mman.discovery_tmr_size,
if (ret) { &adev->mman.discovery_memory,
DRM_ERROR("alloc tmr failed(%d)!\n", ret); NULL);
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); if (ret) {
return ret; DRM_ERROR("alloc tmr failed(%d)!\n", ret);
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
return ret;
}
} else {
DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
} }
return 0; return 0;
...@@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) ...@@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size); adev->gmc.visible_vram_size);
else else if (!adev->gmc.is_app_apu)
#endif #endif
adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
adev->gmc.visible_vram_size); adev->gmc.visible_vram_size);
else
DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n");
#endif #endif
/* /*
...@@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) ...@@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
* This is used for VGA emulation and pre-OS scanout buffers to * This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS * avoid display artifacts while transitioning between pre-OS
* and driver. */ * and driver. */
r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, if (!adev->gmc.is_app_apu) {
&adev->mman.stolen_vga_memory, r = amdgpu_bo_create_kernel_at(adev, 0,
NULL); adev->mman.stolen_vga_size,
if (r) &adev->mman.stolen_vga_memory,
return r; NULL);
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, if (r)
adev->mman.stolen_extended_size, return r;
&adev->mman.stolen_extended_memory,
NULL); r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
if (r) adev->mman.stolen_extended_size,
return r; &adev->mman.stolen_extended_memory,
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, NULL);
adev->mman.stolen_reserved_size,
&adev->mman.stolen_reserved_memory, if (r)
NULL); return r;
if (r)
return r; r = amdgpu_bo_create_kernel_at(adev,
adev->mman.stolen_reserved_offset,
adev->mman.stolen_reserved_size,
&adev->mman.stolen_reserved_memory,
NULL);
if (r)
return r;
} else {
DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
}
DRM_INFO("amdgpu: %uM of VRAM memory ready\n", DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
...@@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) ...@@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("Failed initializing oa heap.\n"); DRM_ERROR("Failed initializing oa heap.\n");
return r; return r;
} }
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_bo, NULL,
...@@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ...@@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
amdgpu_ttm_training_reserve_vram_fini(adev); amdgpu_ttm_training_reserve_vram_fini(adev);
/* return the stolen vga memory back to VRAM */ /* return the stolen vga memory back to VRAM */
amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); if (!adev->gmc.is_app_apu) {
amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
/* return the IP Discovery TMR memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); /* return the IP Discovery TMR memory back to VRAM */
if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, if (adev->mman.stolen_reserved_size)
NULL, NULL); amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
NULL, NULL);
}
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr); &adev->mman.sdma_access_ptr);
amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev);
...@@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ...@@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
int r; int r;
if (!adev->mman.initialized || amdgpu_in_reset(adev) || if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
adev->mman.buffer_funcs_enabled == enable) adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
return; return;
if (enable) { if (enable) {
......
...@@ -512,7 +512,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -512,7 +512,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.size = amdgpu_vm_pt_size(adev, level); bp.size = amdgpu_vm_pt_size(adev, level);
bp.byte_align = AMDGPU_GPU_PAGE_SIZE; bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
if (!adev->gmc.is_app_apu)
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
else
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC; AMDGPU_GEM_CREATE_CPU_GTT_USWC;
......
...@@ -459,7 +459,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) ...@@ -459,7 +459,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE; adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
if (mec_hpd_size) { if (mec_hpd_size) {
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr, &adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd); (void **)&hpd);
......
...@@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) ...@@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
int r; int r;
/* size in MB on si */ /* size in MB on si */
adev->gmc.mc_vram_size = if (!adev->gmc.is_app_apu) {
adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.mc_vram_size =
adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
} else {
DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
adev->gmc.mc_vram_size = 0;
}
adev->gmc.real_vram_size = adev->gmc.mc_vram_size; adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU) && if (!(adev->flags & AMD_IS_APU) &&
......
...@@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) ...@@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
if (dev->kfd->local_mem_info.local_mem_size_private == 0 && if (dev->kfd->local_mem_info.local_mem_size_private == 0 &&
dev->kfd->local_mem_info.local_mem_size_public > 0) dev->kfd->local_mem_info.local_mem_size_public > 0)
return true; return true;
if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) {
pr_debug("APP APU, Consider like a large bar system\n");
return true;
}
return false; return false;
} }
......
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
/* Fixme: Fake 32GB for 1PNPS1 mode bringup */
#define DUMMY_VRAM_SIZE 31138512896
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1. * GPU processor ID are expressed with Bit[31]=1.
* The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
...@@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, ...@@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
props->heap_type = heap_type; props->heap_type = heap_type;
props->flags = flags; props->flags = flags;
if (size_in_bytes == 0)
size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */
props->size_in_bytes = size_in_bytes; props->size_in_bytes = size_in_bytes;
props->width = width; props->width = width;
......
...@@ -822,11 +822,20 @@ static int smu_init_fb_allocations(struct smu_context *smu) ...@@ -822,11 +822,20 @@ static int smu_init_fb_allocations(struct smu_context *smu)
} }
} }
driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT;
/* VRAM allocation for driver table */ /* VRAM allocation for driver table */
for (i = 0; i < SMU_TABLE_COUNT; i++) { for (i = 0; i < SMU_TABLE_COUNT; i++) {
if (tables[i].size == 0) if (tables[i].size == 0)
continue; continue;
/* If one of the tables has VRAM domain restriction, keep it in
* VRAM
*/
if ((tables[i].domain &
(AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) ==
AMDGPU_GEM_DOMAIN_VRAM)
driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
if (i == SMU_TABLE_PMSTATUSLOG) if (i == SMU_TABLE_PMSTATUSLOG)
continue; continue;
...@@ -836,7 +845,6 @@ static int smu_init_fb_allocations(struct smu_context *smu) ...@@ -836,7 +845,6 @@ static int smu_init_fb_allocations(struct smu_context *smu)
driver_table->size = max_table_size; driver_table->size = max_table_size;
driver_table->align = PAGE_SIZE; driver_table->align = PAGE_SIZE;
driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
ret = amdgpu_bo_create_kernel(adev, ret = amdgpu_bo_create_kernel(adev,
driver_table->size, driver_table->size,
......
...@@ -220,10 +220,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) ...@@ -220,10 +220,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
if (!smu_table->metrics_table) if (!smu_table->metrics_table)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment