Commit 4f949033 authored by Lang Yu's avatar Lang Yu Committed by Alex Deucher

drm/amdgpu: add PSP loading support for UMSCH

Add front door loading support.
Signed-off-by: default avatarLang Yu <Lang.Yu@amd.com>
Reviewed-by: default avatarLeo Liu <leo.liu@amd.com>
Reviewed-by: default avatarVeerabadhran Gopalakrishnan <Veerabadhran.Gopalakrishnan@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 40748f9a
......@@ -2399,6 +2399,15 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_VPE:
*type = GFX_FW_TYPE_VPE;
break;
case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
*type = GFX_FW_TYPE_UMSCH_UCODE;
break;
case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
*type = GFX_FW_TYPE_UMSCH_DATA;
break;
case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
*type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
......
......@@ -664,6 +664,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "DMCUB";
case AMDGPU_UCODE_ID_CAP:
return "CAP";
case AMDGPU_UCODE_ID_VPE_CTX:
return "VPE_CTX";
case AMDGPU_UCODE_ID_VPE_CTL:
return "VPE_CTL";
case AMDGPU_UCODE_ID_VPE:
return "VPE";
case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
return "UMSCH_MM_UCODE";
case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
return "UMSCH_MM_DATA";
default:
return "UNKNOWN UCODE";
}
......@@ -750,6 +760,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
u8 *ucode_addr;
if (!ucode->fw)
......@@ -962,6 +973,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(vpe_hdr->ctl_ucode_offset);
break;
case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes);
break;
default:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
......
......@@ -507,6 +507,9 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_VPE_CTX,
AMDGPU_UCODE_ID_VPE_CTL,
AMDGPU_UCODE_ID_VPE,
AMDGPU_UCODE_ID_UMSCH_MM_UCODE,
AMDGPU_UCODE_ID_UMSCH_MM_DATA,
AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
AMDGPU_UCODE_ID_MAXIMUM,
};
......
......@@ -76,6 +76,17 @@ struct umsch_mm_test {
uint32_t num_queues;
};
int umsch_mm_psp_update_sram(struct amdgpu_device *adev, u32 ucode_size)
{
struct amdgpu_firmware_info ucode = {
.ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
.mc_addr = adev->umsch_mm.cmd_buf_gpu_addr,
.ucode_size = ucode_size,
};
return psp_execute_ip_fw_load(&adev->psp, &ucode);
}
static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t addr, uint32_t size)
......@@ -600,6 +611,22 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) |
((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
struct amdgpu_firmware_info *info;
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE];
info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE;
info->fw = adev->umsch_mm.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA];
info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA;
info->fw = adev->umsch_mm.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE);
}
return 0;
}
......@@ -667,6 +694,17 @@ int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch)
return 0;
}
void* amdgpu_umsch_mm_add_cmd(struct amdgpu_umsch_mm *umsch,
void* cmd_ptr, uint32_t reg_offset, uint32_t reg_data)
{
uint32_t* ptr = (uint32_t *)cmd_ptr;
*ptr++ = (reg_offset << 2);
*ptr++ = reg_data;
return ptr;
}
static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
{
uint32_t umsch_mm_agdb_start;
......@@ -697,6 +735,17 @@ static int umsch_mm_init(struct amdgpu_device *adev)
adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr +
(adev->umsch_mm.wb_index * 4);
r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->umsch_mm.cmd_buf_obj,
&adev->umsch_mm.cmd_buf_gpu_addr,
(void **)&adev->umsch_mm.cmd_buf_ptr);
if (r) {
dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r);
amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
return r;
}
mutex_init(&adev->umsch_mm.mutex_hidden);
umsch_mm_agdb_index_init(adev);
......@@ -760,6 +809,11 @@ static int umsch_mm_sw_fini(void *handle)
amdgpu_ring_fini(&adev->umsch_mm.ring);
mutex_destroy(&adev->umsch_mm.mutex_hidden);
amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj,
&adev->umsch_mm.cmd_buf_gpu_addr,
(void **)&adev->umsch_mm.cmd_buf_ptr);
amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
return 0;
......
......@@ -147,6 +147,10 @@ struct amdgpu_umsch_mm {
uint64_t data_start_addr;
uint32_t data_size;
struct amdgpu_bo *cmd_buf_obj;
uint64_t cmd_buf_gpu_addr;
uint32_t *cmd_buf_ptr;
uint32_t wb_index;
uint64_t sch_ctx_gpu_addr;
uint32_t *sch_ctx_cpu_addr;
......@@ -163,12 +167,16 @@ struct amdgpu_umsch_mm {
struct mutex mutex_hidden;
};
int umsch_mm_psp_update_sram(struct amdgpu_device *adev, u32 ucode_size);
int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch);
int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch);
int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch);
int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch);
void* amdgpu_umsch_mm_add_cmd(struct amdgpu_umsch_mm *umsch,
void* cmd_ptr, uint32_t reg_offset, uint32_t reg_data);
int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
......
......@@ -34,9 +34,22 @@
#include "umsch_mm_4_0_api_def.h"
#include "umsch_mm_v4_0.h"
#define WREG32_SOC15_UMSCH(ptr, reg, value) \
({ void *ret = ptr; \
do { \
uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) \
ret = amdgpu_umsch_mm_add_cmd((&adev->umsch_mm), (ptr), (reg_offset), (value)); \
else \
WREG32(reg_offset, value); \
} while (0); \
ret; \
})
static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
{
struct amdgpu_device *adev = umsch->ring.adev;
void* ptr = umsch->cmd_buf_ptr;
uint32_t data;
int r;
......@@ -50,88 +63,95 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
data = RREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL);
data = REG_SET_FIELD(data, UMSCH_MES_RESET_CTRL, MES_CORE_SOFT_RESET, 0);
WREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regUMSCH_MES_RESET_CTRL, data);
data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 1);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 0);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 1);
WREG32_SOC15(VCN, 0, regVCN_MES_CNTL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_CNTL, data);
data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL);
data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, VMID, 0);
data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, EXE_DISABLE, 0);
data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, CACHE_POLICY, 0);
WREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_BASE_CNTL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_INTR_ROUTINE_START,
lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
WREG32_SOC15(VCN, 0, regVCN_MES_INTR_ROUTINE_START,
lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
WREG32_SOC15(VCN, 0, regVCN_MES_INTR_ROUTINE_START_HI,
upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_INTR_ROUTINE_START_HI,
upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
WREG32_SOC15(VCN, 0, regVCN_MES_PRGRM_CNTR_START,
lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
WREG32_SOC15(VCN, 0, regVCN_MES_PRGRM_CNTR_START_HI,
upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_PRGRM_CNTR_START,
lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_PRGRM_CNTR_START_HI,
upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
data = adev->umsch_mm.uc_start_addr + adev->umsch_mm.ucode_size - 1;
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
WREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_LO,
lower_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
WREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_HI,
upper_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_BASE_LO,
lower_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_BASE_HI,
upper_32_bits(adev->umsch_mm.ucode_fw_gpu_addr));
WREG32_SOC15(VCN, 0, regVCN_MES_MIBOUND_LO, 0x1FFFFF);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_MIBOUND_LO, 0x1FFFFF);
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_BASE0_LO,
lower_32_bits(adev->umsch_mm.data_start_addr));
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_BASE0_HI,
upper_32_bits(adev->umsch_mm.data_start_addr));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_BASE0_LO,
lower_32_bits(adev->umsch_mm.data_start_addr));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_BASE0_HI,
upper_32_bits(adev->umsch_mm.data_start_addr));
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_MASK0_LO,
lower_32_bits(adev->umsch_mm.data_size - 1));
WREG32_SOC15(VCN, 0, regVCN_MES_LOCAL_MASK0_HI,
upper_32_bits(adev->umsch_mm.data_size - 1));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_MASK0_LO,
lower_32_bits(adev->umsch_mm.data_size - 1));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_LOCAL_MASK0_HI,
upper_32_bits(adev->umsch_mm.data_size - 1));
WREG32_SOC15(VCN, 0, regVCN_MES_DC_BASE_LO,
lower_32_bits(adev->umsch_mm.data_fw_gpu_addr));
WREG32_SOC15(VCN, 0, regVCN_MES_DC_BASE_HI,
upper_32_bits(adev->umsch_mm.data_fw_gpu_addr));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_DC_BASE_LO,
lower_32_bits(adev->umsch_mm.data_fw_gpu_addr));
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_DC_BASE_HI,
upper_32_bits(adev->umsch_mm.data_fw_gpu_addr));
WREG32_SOC15(VCN, 0, regVCN_MES_MDBOUND_LO, 0x3FFFF);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_MDBOUND_LO, 0x3FFFF);
data = RREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE);
data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, IC_FORCE_GPUVM, 1);
data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, DC_FORCE_GPUVM, 1);
WREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE, data);
ptr = WREG32_SOC15_UMSCH(ptr, regUVD_UMSCH_FORCE, data);
data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
WREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_OP_CNTL, data);
data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
WREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_IC_OP_CNTL, data);
WREG32_SOC15(VCN, 0, regVCN_MES_GP0_LO, 0);
WREG32_SOC15(VCN, 0, regVCN_MES_GP0_HI, 0);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP0_LO, 0);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP0_HI, 0);
WREG32_SOC15(VCN, 0, regVCN_MES_GP1_LO, 0);
WREG32_SOC15(VCN, 0, regVCN_MES_GP1_HI, 0);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP1_LO, 0);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_GP1_HI, 0);
data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 0);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 0);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 0);
data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 1);
WREG32_SOC15(VCN, 0, regVCN_MES_CNTL, data);
ptr = WREG32_SOC15_UMSCH(ptr, regVCN_MES_CNTL, data);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
umsch_mm_psp_update_sram(adev,
(u32)((uintptr_t)ptr - (uintptr_t)umsch->cmd_buf_ptr));
}
r = SOC15_WAIT_ON_RREG(VCN, 0, regVCN_MES_MSTATUS_LO, 0xAAAAAAAA, 0xFFFFFFFF);
if (r) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment