Commit 9370e430 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'amd-drm-fixes-6.11-2024-08-14' of...

Merge tag 'amd-drm-fixes-6.11-2024-08-14' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.11-2024-08-14:

amdgpu:
- Fix MES ring buffer overflow
- DCN 3.5 fix
- DCN 3.2.1 fix
- DP MST fix
- Cursor fixes
- JPEG fixes
- Context ops validation
- MES 12 fixes
- VCN 5.0 fix
- HDP fix
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240814213846.1331827-1-alexander.deucher@amd.com
parents 7c626ce4 23acd1f3
...@@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, ...@@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
r = amdgpu_ring_parse_cs(ring, p, job, ib); r = amdgpu_ring_parse_cs(ring, p, job, ib);
if (r) if (r)
return r; return r;
if (ib->sa_bo)
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
} else { } else {
ib->ptr = (uint32_t *)kptr; ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
......
...@@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, ...@@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
switch (args->in.op) { switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX: case AMDGPU_CTX_OP_ALLOC_CTX:
if (args->in.flags)
return -EINVAL;
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
args->out.alloc.ctx_id = id; args->out.alloc.ctx_id = id;
break; break;
case AMDGPU_CTX_OP_FREE_CTX: case AMDGPU_CTX_OP_FREE_CTX:
if (args->in.flags)
return -EINVAL;
r = amdgpu_ctx_free(fpriv, id); r = amdgpu_ctx_free(fpriv, id);
break; break;
case AMDGPU_CTX_OP_QUERY_STATE: case AMDGPU_CTX_OP_QUERY_STATE:
if (args->in.flags)
return -EINVAL;
r = amdgpu_ctx_query(adev, fpriv, id, &args->out); r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break; break;
case AMDGPU_CTX_OP_QUERY_STATE2: case AMDGPU_CTX_OP_QUERY_STATE2:
if (args->in.flags)
return -EINVAL;
r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
break; break;
case AMDGPU_CTX_OP_GET_STABLE_PSTATE: case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
......
...@@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) ...@@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
int i, r = 0; int i, r = 0;
int j; int j;
if (adev->enable_mes) {
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
j = i + xcc_id * adev->gfx.num_compute_rings;
amdgpu_mes_unmap_legacy_queue(adev,
&adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
}
return 0;
}
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL; return -EINVAL;
...@@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) ...@@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
int i, r = 0; int i, r = 0;
int j; int j;
if (adev->enable_mes) {
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
j = i + xcc_id * adev->gfx.num_gfx_rings;
amdgpu_mes_unmap_legacy_queue(adev,
&adev->gfx.gfx_ring[j],
PREEMPT_QUEUES, 0, 0);
}
}
return 0;
}
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL; return -EINVAL;
...@@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ ...@@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
if (amdgpu_device_skip_hw_access(adev)) if (amdgpu_device_skip_hw_access(adev))
return 0; return 0;
if (adev->mes.ring.sched.ready) if (adev->mes.ring[0].sched.ready)
return amdgpu_mes_rreg(adev, reg); return amdgpu_mes_rreg(adev, reg);
BUG_ON(!ring->funcs->emit_rreg); BUG_ON(!ring->funcs->emit_rreg);
...@@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 ...@@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
if (amdgpu_device_skip_hw_access(adev)) if (amdgpu_device_skip_hw_access(adev))
return; return;
if (adev->mes.ring.sched.ready) { if (adev->mes.ring[0].sched.ready) {
amdgpu_mes_wreg(adev, reg, v); amdgpu_mes_wreg(adev, reg, v);
return; return;
} }
......
...@@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ...@@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
ring = adev->rings[i]; ring = adev->rings[i];
vmhub = ring->vm_hub; vmhub = ring->vm_hub;
if (ring == &adev->mes.ring || if (ring == &adev->mes.ring[0] ||
ring == &adev->mes.ring[1] ||
ring == &adev->umsch_mm.ring) ring == &adev->umsch_mm.ring)
continue; continue;
...@@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, ...@@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
unsigned long flags; unsigned long flags;
uint32_t seq; uint32_t seq;
if (adev->mes.ring.sched.ready) { if (adev->mes.ring[0].sched.ready) {
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
ref, mask); ref, mask);
return; return;
......
...@@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) ...@@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr); idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida); ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock); spin_lock_init(&adev->mes.queue_id_lock);
spin_lock_init(&adev->mes.ring_lock);
mutex_init(&adev->mes.mutex_hidden); mutex_init(&adev->mes.mutex_hidden);
for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
spin_lock_init(&adev->mes.ring_lock[i]);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_mmhub = 0xffffff00;
adev->mes.vmid_mask_gfxhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00;
...@@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) ...@@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.sdma_hqd_mask[i] = 0xfc; adev->mes.sdma_hqd_mask[i] = 0xfc;
} }
r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
if (r) { r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
dev_err(adev->dev, if (r) {
"(%d) ring trail_fence_offs wb alloc failed\n", r); dev_err(adev->dev,
goto error_ids; "(%d) ring trail_fence_offs wb alloc failed\n",
} r);
adev->mes.sch_ctx_gpu_addr = goto error;
adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); }
adev->mes.sch_ctx_ptr = adev->mes.sch_ctx_gpu_addr[i] =
(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4);
adev->mes.sch_ctx_ptr[i] =
(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]];
r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); r = amdgpu_device_wb_get(adev,
if (r) { &adev->mes.query_status_fence_offs[i]);
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); if (r) {
dev_err(adev->dev, dev_err(adev->dev,
"(%d) query_status_fence_offs wb alloc failed\n", r); "(%d) query_status_fence_offs wb alloc failed\n",
goto error_ids; r);
goto error;
}
adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr +
(adev->mes.query_status_fence_offs[i] * 4);
adev->mes.query_status_fence_ptr[i] =
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
} }
adev->mes.query_status_fence_gpu_addr =
adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
adev->mes.query_status_fence_ptr =
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
if (r) { if (r) {
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
dev_err(adev->dev, dev_err(adev->dev,
"(%d) read_val_offs alloc failed\n", r); "(%d) read_val_offs alloc failed\n", r);
goto error_ids; goto error;
} }
adev->mes.read_val_gpu_addr = adev->mes.read_val_gpu_addr =
adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
...@@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) ...@@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
error_doorbell: error_doorbell:
amdgpu_mes_doorbell_free(adev); amdgpu_mes_doorbell_free(adev);
error: error:
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.read_val_offs); amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
error_ids: if (adev->mes.query_status_fence_ptr[i])
amdgpu_device_wb_free(adev,
adev->mes.query_status_fence_offs[i]);
}
if (adev->mes.read_val_ptr)
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr); idr_destroy(&adev->mes.queue_id_idr);
...@@ -226,13 +236,22 @@ int amdgpu_mes_init(struct amdgpu_device *adev) ...@@ -226,13 +236,22 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
void amdgpu_mes_fini(struct amdgpu_device *adev) void amdgpu_mes_fini(struct amdgpu_device *adev)
{ {
int i;
amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr, &adev->mes.event_log_gpu_addr,
&adev->mes.event_log_cpu_addr); &adev->mes.event_log_cpu_addr);
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.read_val_offs); amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
if (adev->mes.query_status_fence_ptr[i])
amdgpu_device_wb_free(adev,
adev->mes.query_status_fence_offs[i]);
}
if (adev->mes.read_val_ptr)
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
amdgpu_mes_doorbell_free(adev); amdgpu_mes_doorbell_free(adev);
idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.pasid_idr);
...@@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) ...@@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
sizeof(ucode_prefix)); sizeof(ucode_prefix));
if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { if (adev->enable_uni_mes) {
snprintf(fw_name, sizeof(fw_name), snprintf(fw_name, sizeof(fw_name),
"amdgpu/%s_uni_mes.bin", ucode_prefix); "amdgpu/%s_uni_mes.bin", ucode_prefix);
} else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
......
...@@ -82,8 +82,8 @@ struct amdgpu_mes { ...@@ -82,8 +82,8 @@ struct amdgpu_mes {
uint64_t default_process_quantum; uint64_t default_process_quantum;
uint64_t default_gang_quantum; uint64_t default_gang_quantum;
struct amdgpu_ring ring; struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
spinlock_t ring_lock; spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
...@@ -112,12 +112,12 @@ struct amdgpu_mes { ...@@ -112,12 +112,12 @@ struct amdgpu_mes {
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t sch_ctx_offs; uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
uint64_t sch_ctx_gpu_addr; uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
uint64_t *sch_ctx_ptr; uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
uint32_t query_status_fence_offs; uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
uint64_t query_status_fence_gpu_addr; uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
uint64_t *query_status_fence_ptr; uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
uint32_t read_val_offs; uint32_t read_val_offs;
uint64_t read_val_gpu_addr; uint64_t read_val_gpu_addr;
uint32_t *read_val_ptr; uint32_t *read_val_ptr;
......
...@@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ...@@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/ */
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256); sched_hw_submission = max(sched_hw_submission, 256);
if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
sched_hw_submission = 8;
else if (ring == &adev->sdma.instance[0].page) else if (ring == &adev->sdma.instance[0].page)
sched_hw_submission = 256; sched_hw_submission = 256;
......
...@@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { ...@@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared {
struct amdgpu_fw_shared_unified_queue_struct sq; struct amdgpu_fw_shared_unified_queue_struct sq;
uint8_t pad1[8]; uint8_t pad1[8];
struct amdgpu_fw_shared_fw_logging fw_log; struct amdgpu_fw_shared_fw_logging fw_log;
uint8_t pad2[20];
struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_rb_setup rb_setup;
uint8_t pad2[4]; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
uint8_t pad3[9];
}; };
#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
......
...@@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) ...@@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev)
adev->gfx.is_poweron = false; adev->gfx.is_poweron = false;
} }
adev->mes.ring.sched.ready = false; adev->mes.ring[0].sched.ready = false;
} }
bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
......
...@@ -3546,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) ...@@ -3546,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle)
return r; return r;
} }
static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
int i, r = 0;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_gfx_rings))
return -ENOMEM;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
PREEMPT_QUEUES, 0, 0);
if (adev->gfx.kiq[0].ring.sched.ready)
r = amdgpu_ring_test_helper(kiq_ring);
return r;
}
static int gfx_v12_0_hw_fini(void *handle) static int gfx_v12_0_hw_fini(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
uint32_t tmp; uint32_t tmp;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
...@@ -3580,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) ...@@ -3580,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle)
if (!adev->no_hw_access) { if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) { if (amdgpu_async_gfx_ring) {
r = gfx_v12_0_kiq_disable_kgq(adev); if (amdgpu_gfx_disable_kgq(adev, 0))
if (r)
DRM_ERROR("KGQ disable failed\n"); DRM_ERROR("KGQ disable failed\n");
} }
......
...@@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ...@@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* This is necessary for SRIOV as well as for GFXOFF to function /* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal * properly under bare metal
*/ */
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid, GET_INST(GC, 0)); 1 << vmid, GET_INST(GC, 0));
......
...@@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ...@@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* This is necessary for SRIOV as well as for GFXOFF to function /* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal * properly under bare metal
*/ */
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17; const unsigned eng = 17;
......
...@@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0)); 0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0)); 0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0)); 0, 0, PACKETJ_TYPE0));
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_jpeg.h" #include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "soc15.h" #include "soc15.h"
#include "soc15d.h" #include "soc15d.h"
#include "jpeg_v4_0_3.h" #include "jpeg_v4_0_3.h"
...@@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0)); 0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
if (ring->funcs->parse_cs)
amdgpu_ring_write(ring, 0);
else
amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0)); 0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0)); 0, 0, PACKETJ_TYPE0));
...@@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { ...@@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.emit_frame_size = .emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
...@@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) ...@@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
{ {
adev->jpeg.ras = &jpeg_v4_0_3_ras; adev->jpeg.ras = &jpeg_v4_0_3_ras;
} }
/**
* jpeg_v4_0_3_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
uint32_t i, reg, res, cond, type;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res) /* only support 0 at the moment */
return -EINVAL;
switch (type) {
case PACKETJ_TYPE0:
if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE3:
if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] == CP_PACKETJ_NOP)
continue;
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
default:
dev_err(adev->dev, "Unknown packet type %d !\n", type);
return -EINVAL;
}
}
return 0;
}
...@@ -46,6 +46,9 @@ ...@@ -46,6 +46,9 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
#define JPEG_REG_RANGE_START 0x4000
#define JPEG_REG_RANGE_END 0x41c2
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
...@@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); ...@@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask); uint32_t val, uint32_t mask);
int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
#endif /* __JPEG_V4_0_3_H__ */ #endif /* __JPEG_V4_0_3_H__ */
...@@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { ...@@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.emit_frame_size = .emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
...@@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
union MESAPI__QUERY_MES_STATUS mes_status_pkt; union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 3000000; /* 3000 ms */ signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev; struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring; struct amdgpu_ring *ring = &mes->ring[0];
struct MES_API_STATUS *api_status; struct MES_API_STATUS *api_status;
union MESAPI__MISC *x_pkt = pkt; union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str; const char *op_str, *misc_op_str;
unsigned long flags; unsigned long flags;
u64 status_gpu_addr; u64 status_gpu_addr;
u32 status_offset; u32 seq, status_offset;
u64 *status_ptr; u64 *status_ptr;
signed long r; signed long r;
int ret; int ret;
...@@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
status_ptr = (u64 *)&adev->wb.wb[status_offset]; status_ptr = (u64 *)&adev->wb.wb[status_offset];
*status_ptr = 0; *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags); spin_lock_irqsave(&mes->ring_lock[0], flags);
r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
if (r) if (r)
goto error_unlock_free; goto error_unlock_free;
seq = ++ring->fence_drv.sync_seq;
r = amdgpu_fence_wait_polling(ring,
seq - ring->fence_drv.num_fences_mask,
timeout);
if (r < 1)
goto error_undo;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1; api_status->api_completion_fence_value = 1;
...@@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr = mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr; ring->fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value = mes_status_pkt.api_status.api_completion_fence_value = seq;
++ring->fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt, amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4); sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags); spin_unlock_irqrestore(&mes->ring_lock[0], flags);
op_str = mes_v11_0_get_op_string(x_pkt); op_str = mes_v11_0_get_op_string(x_pkt);
misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
...@@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
dev_dbg(adev->dev, "MES msg=%d was emitted\n", dev_dbg(adev->dev, "MES msg=%d was emitted\n",
x_pkt->header.opcode); x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); r = amdgpu_fence_wait_polling(ring, seq, timeout);
if (r < 1 || !*status_ptr) { if (r < 1 || !*status_ptr) {
if (misc_op_str) if (misc_op_str)
...@@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_device_wb_free(adev, status_offset); amdgpu_device_wb_free(adev, status_offset);
return 0; return 0;
error_undo:
dev_err(adev->dev, "MES ring buffer is full.\n");
amdgpu_ring_undo(ring);
error_unlock_free: error_unlock_free:
spin_unlock_irqrestore(&mes->ring_lock, flags); spin_unlock_irqrestore(&mes->ring_lock[0], flags);
error_wb_free: error_wb_free:
amdgpu_device_wb_free(adev, status_offset); amdgpu_device_wb_free(adev, status_offset);
...@@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) ...@@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
mes_set_hw_res_pkt.paging_vmid = 0; mes_set_hw_res_pkt.paging_vmid = 0;
mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0];
mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
mes->query_status_fence_gpu_addr; mes->query_status_fence_gpu_addr[0];
for (i = 0; i < MAX_COMPUTE_PIPES; i++) for (i = 0; i < MAX_COMPUTE_PIPES; i++)
mes_set_hw_res_pkt.compute_hqd_mask[i] = mes_set_hw_res_pkt.compute_hqd_mask[i] =
...@@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) ...@@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
return r; return r;
} }
kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
return amdgpu_ring_test_helper(kiq_ring); return amdgpu_ring_test_helper(kiq_ring);
} }
...@@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, ...@@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_KIQ_PIPE) if (pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring; ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE) else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring; ring = &adev->mes.ring[0];
else else
BUG(); BUG();
...@@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) ...@@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
ring = &adev->mes.ring; ring = &adev->mes.ring[0];
ring->funcs = &mes_v11_0_ring_funcs; ring->funcs = &mes_v11_0_ring_funcs;
...@@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, ...@@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_KIQ_PIPE) if (pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring; ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE) else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring; ring = &adev->mes.ring[0];
else else
BUG(); BUG();
...@@ -1200,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle) ...@@ -1200,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe; int pipe;
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
kfree(adev->mes.mqd_backup[pipe]); kfree(adev->mes.mqd_backup[pipe]);
...@@ -1216,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle) ...@@ -1216,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle)
&adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_gpu_addr,
&adev->gfx.kiq[0].ring.mqd_ptr); &adev->gfx.kiq[0].ring.mqd_ptr);
amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj,
&adev->mes.ring.mqd_gpu_addr, &adev->mes.ring[0].mqd_gpu_addr,
&adev->mes.ring.mqd_ptr); &adev->mes.ring[0].mqd_ptr);
amdgpu_ring_fini(&adev->gfx.kiq[0].ring); amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
amdgpu_ring_fini(&adev->mes.ring); amdgpu_ring_fini(&adev->mes.ring[0]);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
...@@ -1332,9 +1339,9 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) ...@@ -1332,9 +1339,9 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
{ {
if (adev->mes.ring.sched.ready) { if (adev->mes.ring[0].sched.ready) {
mes_v11_0_kiq_dequeue(&adev->mes.ring); mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
adev->mes.ring.sched.ready = false; adev->mes.ring[0].sched.ready = false;
} }
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
...@@ -1352,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle) ...@@ -1352,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle)
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->mes.ring.sched.ready) if (adev->mes.ring[0].sched.ready)
goto out; goto out;
if (!adev->enable_mes_kiq) { if (!adev->enable_mes_kiq) {
...@@ -1397,7 +1404,7 @@ static int mes_v11_0_hw_init(void *handle) ...@@ -1397,7 +1404,7 @@ static int mes_v11_0_hw_init(void *handle)
* with MES enabled. * with MES enabled.
*/ */
adev->gfx.kiq[0].ring.sched.ready = false; adev->gfx.kiq[0].ring.sched.ready = false;
adev->mes.ring.sched.ready = true; adev->mes.ring[0].sched.ready = true;
return 0; return 0;
......
...@@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) ...@@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
} }
static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size, int pipe, void *pkt, int size,
int api_status_off) int api_status_off)
{ {
union MESAPI__QUERY_MES_STATUS mes_status_pkt; union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 3000000; /* 3000 ms */ signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev; struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring; struct amdgpu_ring *ring = &mes->ring[pipe];
spinlock_t *ring_lock = &mes->ring_lock[pipe];
struct MES_API_STATUS *api_status; struct MES_API_STATUS *api_status;
union MESAPI__MISC *x_pkt = pkt; union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str; const char *op_str, *misc_op_str;
unsigned long flags; unsigned long flags;
u64 status_gpu_addr; u64 status_gpu_addr;
u32 status_offset; u32 seq, status_offset;
u64 *status_ptr; u64 *status_ptr;
signed long r; signed long r;
int ret; int ret;
...@@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
status_ptr = (u64 *)&adev->wb.wb[status_offset]; status_ptr = (u64 *)&adev->wb.wb[status_offset];
*status_ptr = 0; *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags); spin_lock_irqsave(ring_lock, flags);
r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
if (r) if (r)
goto error_unlock_free; goto error_unlock_free;
seq = ++ring->fence_drv.sync_seq;
r = amdgpu_fence_wait_polling(ring,
seq - ring->fence_drv.num_fences_mask,
timeout);
if (r < 1)
goto error_undo;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1; api_status->api_completion_fence_value = 1;
...@@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr = mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr; ring->fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value = mes_status_pkt.api_status.api_completion_fence_value = seq;
++ring->fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt, amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4); sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags); spin_unlock_irqrestore(ring_lock, flags);
op_str = mes_v12_0_get_op_string(x_pkt); op_str = mes_v12_0_get_op_string(x_pkt);
misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
if (misc_op_str) if (misc_op_str)
dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
misc_op_str); pipe, op_str, misc_op_str);
else if (op_str) else if (op_str)
dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
pipe, op_str);
else else
dev_dbg(adev->dev, "MES msg=%d was emitted\n", dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
x_pkt->header.opcode); pipe, x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); r = amdgpu_fence_wait_polling(ring, seq, timeout);
if (r < 1 || !*status_ptr) { if (r < 1 || !*status_ptr) {
if (misc_op_str) if (misc_op_str)
dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
op_str, misc_op_str); pipe, op_str, misc_op_str);
else if (op_str) else if (op_str)
dev_err(adev->dev, "MES failed to respond to msg=%s\n", dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
op_str); pipe, op_str);
else else
dev_err(adev->dev, "MES failed to respond to msg=%d\n", dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
x_pkt->header.opcode); pipe, x_pkt->header.opcode);
while (halt_if_hws_hang) while (halt_if_hws_hang)
schedule(); schedule();
...@@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_device_wb_free(adev, status_offset); amdgpu_device_wb_free(adev, status_offset);
return 0; return 0;
error_undo:
dev_err(adev->dev, "MES ring buffer is full.\n");
amdgpu_ring_undo(ring);
error_unlock_free: error_unlock_free:
spin_unlock_irqrestore(&mes->ring_lock, flags); spin_unlock_irqrestore(ring_lock, flags);
error_wb_free: error_wb_free:
amdgpu_device_wb_free(adev, status_offset); amdgpu_device_wb_free(adev, status_offset);
...@@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) ...@@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type)
return MES_QUEUE_TYPE_COMPUTE; return MES_QUEUE_TYPE_COMPUTE;
else if (queue_type == AMDGPU_RING_TYPE_SDMA) else if (queue_type == AMDGPU_RING_TYPE_SDMA)
return MES_QUEUE_TYPE_SDMA; return MES_QUEUE_TYPE_SDMA;
else if (queue_type == AMDGPU_RING_TYPE_MES)
return MES_QUEUE_TYPE_SCHQ;
else else
BUG(); BUG();
return -1; return -1;
...@@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, ...@@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gds_size = input->queue_size; mes_add_queue_pkt.gds_size = input->queue_size;
return mes_v12_0_submit_pkt_and_poll_completion(mes, return mes_v12_0_submit_pkt_and_poll_completion(mes,
AMDGPU_MES_SCHED_PIPE,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt), &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status)); offsetof(union MESAPI__ADD_QUEUE, api_status));
} }
...@@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, ...@@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
return mes_v12_0_submit_pkt_and_poll_completion(mes, return mes_v12_0_submit_pkt_and_poll_completion(mes,
AMDGPU_MES_SCHED_PIPE,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status)); offsetof(union MESAPI__REMOVE_QUEUE, api_status));
} }
...@@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, ...@@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
struct mes_map_legacy_queue_input *input) struct mes_map_legacy_queue_input *input)
{ {
union MESAPI__ADD_QUEUE mes_add_queue_pkt; union MESAPI__ADD_QUEUE mes_add_queue_pkt;
int pipe;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
...@@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, ...@@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
convert_to_mes_queue_type(input->queue_type); convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.map_legacy_kq = 1; mes_add_queue_pkt.map_legacy_kq = 1;
return mes_v12_0_submit_pkt_and_poll_completion(mes, if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt), &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status)); offsetof(union MESAPI__ADD_QUEUE, api_status));
} }
...@@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, ...@@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input) struct mes_unmap_legacy_queue_input *input)
{ {
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
int pipe;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
...@@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, ...@@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
convert_to_mes_queue_type(input->queue_type); convert_to_mes_queue_type(input->queue_type);
} }
return mes_v12_0_submit_pkt_and_poll_completion(mes, if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status)); offsetof(union MESAPI__REMOVE_QUEUE, api_status));
} }
...@@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, ...@@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
return 0; return 0;
} }
static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
{ {
union MESAPI__QUERY_MES_STATUS mes_status_pkt; union MESAPI__QUERY_MES_STATUS mes_status_pkt;
...@@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) ...@@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes)
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
return mes_v12_0_submit_pkt_and_poll_completion(mes, return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_status_pkt, sizeof(mes_status_pkt), &mes_status_pkt, sizeof(mes_status_pkt),
offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
} }
...@@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, ...@@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
struct mes_misc_op_input *input) struct mes_misc_op_input *input)
{ {
union MESAPI__MISC misc_pkt; union MESAPI__MISC misc_pkt;
int pipe;
memset(&misc_pkt, 0, sizeof(misc_pkt)); memset(&misc_pkt, 0, sizeof(misc_pkt));
...@@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, ...@@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
return -EINVAL; return -EINVAL;
} }
return mes_v12_0_submit_pkt_and_poll_completion(mes, if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&misc_pkt, sizeof(misc_pkt), &misc_pkt, sizeof(misc_pkt),
offsetof(union MESAPI__MISC, api_status)); offsetof(union MESAPI__MISC, api_status));
} }
static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
{ {
union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
...@@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) ...@@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes)
mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
return mes_v12_0_submit_pkt_and_poll_completion(mes, return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
} }
static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
{ {
int i; int i;
struct amdgpu_device *adev = mes->adev; struct amdgpu_device *adev = mes->adev;
...@@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) ...@@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; if (pipe == AMDGPU_MES_SCHED_PIPE) {
mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
mes_set_hw_res_pkt.paging_vmid = 0; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; mes_set_hw_res_pkt.paging_vmid = 0;
mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
mes->query_status_fence_gpu_addr; for (i = 0; i < MAX_COMPUTE_PIPES; i++)
mes_set_hw_res_pkt.compute_hqd_mask[i] =
for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes->compute_hqd_mask[i];
mes_set_hw_res_pkt.compute_hqd_mask[i] =
mes->compute_hqd_mask[i]; for (i = 0; i < MAX_GFX_PIPES; i++)
mes_set_hw_res_pkt.gfx_hqd_mask[i] =
for (i = 0; i < MAX_GFX_PIPES; i++) mes->gfx_hqd_mask[i];
mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
for (i = 0; i < MAX_SDMA_PIPES; i++)
for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] =
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; mes->sdma_hqd_mask[i];
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->aggregated_doorbells[i];
}
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
mes_set_hw_res_pkt.aggregated_doorbells[i] = mes->sch_ctx_gpu_addr[pipe];
mes->aggregated_doorbells[i]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
mes->query_status_fence_gpu_addr[pipe];
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
...@@ -556,7 +596,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) ...@@ -556,7 +596,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
} }
return mes_v12_0_submit_pkt_and_poll_completion(mes, return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
} }
...@@ -734,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) ...@@ -734,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
if (enable) { if (enable) {
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
(!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0);
WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
if ((!adev->enable_mes_kiq || adev->enable_uni_mes) &&
pipe == AMDGPU_MES_KIQ_PIPE)
continue;
soc21_grbm_select(adev, 3, pipe, 0, 0); soc21_grbm_select(adev, 3, pipe, 0, 0);
ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
...@@ -757,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) ...@@ -757,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
/* unhalt MES and activate pipe0 */ /* unhalt MES and activate pipe0 */
data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
(!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0);
WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
if (amdgpu_emu_mode) if (amdgpu_emu_mode)
...@@ -774,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) ...@@ -774,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
data = REG_SET_FIELD(data, CP_MES_CNTL, data = REG_SET_FIELD(data, CP_MES_CNTL,
MES_INVALIDATE_ICACHE, 1); MES_INVALIDATE_ICACHE, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
(!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
} }
...@@ -790,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) ...@@ -790,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
if ((!adev->enable_mes_kiq || adev->enable_uni_mes) &&
pipe == AMDGPU_MES_KIQ_PIPE)
continue;
/* me=3, queue=0 */ /* me=3, queue=0 */
soc21_grbm_select(adev, 3, pipe, 0, 0); soc21_grbm_select(adev, 3, pipe, 0, 0);
...@@ -1085,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) ...@@ -1085,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
return r; return r;
} }
kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
r = amdgpu_ring_test_ring(kiq_ring); r = amdgpu_ring_test_ring(kiq_ring);
if (r) { if (r) {
...@@ -1101,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, ...@@ -1101,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
int r; int r;
if (pipe == AMDGPU_MES_KIQ_PIPE) if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring; ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring;
else else
BUG(); ring = &adev->mes.ring[pipe];
if ((pipe == AMDGPU_MES_SCHED_PIPE) && if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
(amdgpu_in_reset(adev) || adev->in_suspend)) { (amdgpu_in_reset(adev) || adev->in_suspend)) {
*(ring->wptr_cpu_addr) = 0; *(ring->wptr_cpu_addr) = 0;
*(ring->rptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0;
...@@ -1120,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, ...@@ -1120,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
return r; return r;
if (pipe == AMDGPU_MES_SCHED_PIPE) { if (pipe == AMDGPU_MES_SCHED_PIPE) {
if (adev->enable_uni_mes) { if (adev->enable_uni_mes)
mes_v12_0_queue_init_register(ring); r = amdgpu_mes_map_legacy_queue(adev, ring);
} else { else
r = mes_v12_0_kiq_enable_queue(adev); r = mes_v12_0_kiq_enable_queue(adev);
if (r) if (r)
return r; return r;
}
} else { } else {
mes_v12_0_queue_init_register(ring); mes_v12_0_queue_init_register(ring);
} }
...@@ -1146,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, ...@@ -1146,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
return 0; return 0;
} }
static int mes_v12_0_ring_init(struct amdgpu_device *adev) static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
{ {
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
ring = &adev->mes.ring; ring = &adev->mes.ring[pipe];
ring->funcs = &mes_v12_0_ring_funcs; ring->funcs = &mes_v12_0_ring_funcs;
ring->me = 3; ring->me = 3;
ring->pipe = 0; ring->pipe = pipe;
ring->queue = 0; ring->queue = 0;
ring->ring_obj = NULL; ring->ring_obj = NULL;
ring->use_doorbell = true; ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
ring->no_scheduler = true; ring->no_scheduler = true;
sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
if (pipe == AMDGPU_MES_SCHED_PIPE)
ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
return amdgpu_ring_init(adev, ring, 1024, NULL, 0, return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL); AMDGPU_RING_PRIO_DEFAULT, NULL);
} }
...@@ -1178,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ...@@ -1178,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
ring = &adev->gfx.kiq[0].ring; ring = &adev->gfx.kiq[0].ring;
ring->me = 3; ring->me = 3;
ring->pipe = adev->enable_uni_mes ? 0 : 1; ring->pipe = 1;
ring->queue = 0; ring->queue = 0;
ring->adev = NULL; ring->adev = NULL;
...@@ -1200,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, ...@@ -1200,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
int r, mqd_size = sizeof(struct v12_compute_mqd); int r, mqd_size = sizeof(struct v12_compute_mqd);
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
if (pipe == AMDGPU_MES_KIQ_PIPE) if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring; ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
ring = &adev->mes.ring;
else else
BUG(); ring = &adev->mes.ring[pipe];
if (ring->mqd_obj) if (ring->mqd_obj)
return 0; return 0;
...@@ -1246,9 +1274,6 @@ static int mes_v12_0_sw_init(void *handle) ...@@ -1246,9 +1274,6 @@ static int mes_v12_0_sw_init(void *handle)
return r; return r;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
continue;
r = mes_v12_0_allocate_eop_buf(adev, pipe); r = mes_v12_0_allocate_eop_buf(adev, pipe);
if (r) if (r)
return r; return r;
...@@ -1256,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle) ...@@ -1256,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle)
r = mes_v12_0_mqd_sw_init(adev, pipe); r = mes_v12_0_mqd_sw_init(adev, pipe);
if (r) if (r)
return r; return r;
}
if (adev->enable_mes_kiq) { if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
r = mes_v12_0_kiq_ring_init(adev); r = mes_v12_0_kiq_ring_init(adev);
else
r = mes_v12_0_ring_init(adev, pipe);
if (r) if (r)
return r; return r;
} }
r = mes_v12_0_ring_init(adev);
if (r)
return r;
return 0; return 0;
} }
...@@ -1276,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle) ...@@ -1276,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe; int pipe;
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
kfree(adev->mes.mqd_backup[pipe]); kfree(adev->mes.mqd_backup[pipe]);
...@@ -1286,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle) ...@@ -1286,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle)
&adev->mes.eop_gpu_addr[pipe], &adev->mes.eop_gpu_addr[pipe],
NULL); NULL);
amdgpu_ucode_release(&adev->mes.fw[pipe]); amdgpu_ucode_release(&adev->mes.fw[pipe]);
}
amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
&adev->gfx.kiq[0].ring.mqd_gpu_addr,
&adev->gfx.kiq[0].ring.mqd_ptr);
amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
&adev->mes.ring.mqd_gpu_addr, amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
&adev->mes.ring.mqd_ptr); &adev->mes.ring[pipe].mqd_gpu_addr,
&adev->mes.ring[pipe].mqd_ptr);
amdgpu_ring_fini(&adev->mes.ring[pipe]);
}
}
amdgpu_ring_fini(&adev->gfx.kiq[0].ring); if (!adev->enable_uni_mes) {
amdgpu_ring_fini(&adev->mes.ring); amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
&adev->gfx.kiq[0].ring.mqd_gpu_addr,
&adev->gfx.kiq[0].ring.mqd_ptr);
amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
}
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
...@@ -1341,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) ...@@ -1341,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
soc21_grbm_select(adev, 0, 0, 0, 0); soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
adev->mes.ring.sched.ready = false; adev->mes.ring[0].sched.ready = false;
} }
static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
...@@ -1362,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) ...@@ -1362,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
{ {
int r = 0; int r = 0;
mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
if (adev->enable_uni_mes) if (adev->enable_uni_mes)
return mes_v12_0_hw_init(adev); mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
else
mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
...@@ -1392,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) ...@@ -1392,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
if (r) if (r)
goto failure; goto failure;
if (adev->enable_uni_mes) {
r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
if (r)
goto failure;
mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
}
r = mes_v12_0_hw_init(adev); r = mes_v12_0_hw_init(adev);
if (r) if (r)
goto failure; goto failure;
...@@ -1405,9 +1435,15 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) ...@@ -1405,9 +1435,15 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
{ {
if (adev->mes.ring.sched.ready) { if (adev->mes.ring[0].sched.ready) {
mes_v12_0_kiq_dequeue_sched(adev); if (adev->enable_uni_mes)
adev->mes.ring.sched.ready = false; amdgpu_mes_unmap_legacy_queue(adev,
&adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
RESET_QUEUES, 0, 0);
else
mes_v12_0_kiq_dequeue_sched(adev);
adev->mes.ring[0].sched.ready = false;
} }
mes_v12_0_enable(adev, false); mes_v12_0_enable(adev, false);
...@@ -1420,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle) ...@@ -1420,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle)
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->mes.ring.sched.ready) if (adev->mes.ring[0].sched.ready)
goto out; goto out;
if (!adev->enable_mes_kiq || adev->enable_uni_mes) { if (!adev->enable_mes_kiq) {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
r = mes_v12_0_load_microcode(adev, r = mes_v12_0_load_microcode(adev,
AMDGPU_MES_SCHED_PIPE, true); AMDGPU_MES_SCHED_PIPE, true);
...@@ -1443,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle) ...@@ -1443,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle)
mes_v12_0_enable(adev, true); mes_v12_0_enable(adev, true);
} }
/* Enable the MES to handle doorbell ring on unmapped queue */
mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
if (r) if (r)
goto failure; goto failure;
r = mes_v12_0_set_hw_resources(&adev->mes); r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
if (r) if (r)
goto failure; goto failure;
if (adev->enable_uni_mes) if (adev->enable_uni_mes)
mes_v12_0_set_hw_resources_1(&adev->mes); mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
mes_v12_0_init_aggregated_doorbell(&adev->mes); mes_v12_0_init_aggregated_doorbell(&adev->mes);
/* Enable the MES to handle doorbell ring on unmapped queue */ r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
r = mes_v12_0_query_sched_status(&adev->mes);
if (r) { if (r) {
DRM_ERROR("MES is busy\n"); DRM_ERROR("MES is busy\n");
goto failure; goto failure;
...@@ -1472,7 +1508,7 @@ static int mes_v12_0_hw_init(void *handle) ...@@ -1472,7 +1508,7 @@ static int mes_v12_0_hw_init(void *handle)
* with MES enabled. * with MES enabled.
*/ */
adev->gfx.kiq[0].ring.sched.ready = false; adev->gfx.kiq[0].ring.sched.ready = false;
adev->mes.ring.sched.ready = true; adev->mes.ring[0].sched.ready = true;
return 0; return 0;
...@@ -1515,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle) ...@@ -1515,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r; int pipe, r;
if (adev->enable_uni_mes) {
r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE);
if (!r)
return 0;
adev->enable_uni_mes = false;
}
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
continue;
r = amdgpu_mes_init_microcode(adev, pipe); r = amdgpu_mes_init_microcode(adev, pipe);
if (r) if (r)
return r; return r;
......
...@@ -76,6 +76,12 @@ ...@@ -76,6 +76,12 @@
((cond & 0xF) << 24) | \ ((cond & 0xF) << 24) | \
((type & 0xF) << 28)) ((type & 0xF) << 28))
#define CP_PACKETJ_NOP 0x60000000
#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
/* Packet 3 types */ /* Packet 3 types */
#define PACKET3_NOP 0x10 #define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11 #define PACKET3_SET_BASE 0x11
......
...@@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) ...@@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_HDP_SD |
AMD_CG_SUPPORT_MC_LS; AMD_CG_SUPPORT_MC_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN | adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_JPEG |
...@@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) ...@@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_HDP_SD |
AMD_CG_SUPPORT_MC_LS; AMD_CG_SUPPORT_MC_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN | adev->pg_flags = AMD_PG_SUPPORT_VCN |
......
...@@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle) ...@@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle)
hpd_rx_irq_work_suspend(dm); hpd_rx_irq_work_suspend(dm);
if (adev->dm.dc->caps.ips_support)
dc_allow_idle_optimizations(adev->dm.dc, true);
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
......
...@@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { ...@@ -804,12 +804,25 @@ struct dsc_mst_fairness_params {
}; };
#if defined(CONFIG_DRM_AMD_DC_FP) #if defined(CONFIG_DRM_AMD_DC_FP)
static int kbps_to_peak_pbn(int kbps) static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link)
{
u8 link_coding_cap;
uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B;
link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link);
if (link_coding_cap == DP_128b_132b_ENCODING)
fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B;
return fec_overhead_multiplier_x1000;
}
static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000)
{ {
u64 peak_kbps = kbps; u64 peak_kbps = kbps;
peak_kbps *= 1006; peak_kbps *= 1006;
peak_kbps = div_u64(peak_kbps, 1000); peak_kbps *= fec_overhead_multiplier_x1000;
peak_kbps = div_u64(peak_kbps, 1000 * 1000);
return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
} }
...@@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, ...@@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state,
int link_timeslots_used; int link_timeslots_used;
int fair_pbn_alloc; int fair_pbn_alloc;
int ret = 0; int ret = 0;
uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled) { if (vars[i + k].dsc_enabled) {
initial_slack[i] = initial_slack[i] =
kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn;
bpp_increased[i] = false; bpp_increased[i] = false;
remaining_to_increase += 1; remaining_to_increase += 1;
} else { } else {
...@@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, ...@@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
int next_index; int next_index;
int remaining_to_try = 0; int remaining_to_try = 0;
int ret; int ret;
uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled if (vars[i + k].dsc_enabled
...@@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, ...@@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
if (next_index == -1) if (next_index == -1)
break; break;
vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
ret = drm_dp_atomic_find_time_slots(state, ret = drm_dp_atomic_find_time_slots(state,
params[next_index].port->mgr, params[next_index].port->mgr,
params[next_index].port, params[next_index].port,
...@@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, ...@@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
vars[next_index].dsc_enabled = false; vars[next_index].dsc_enabled = false;
vars[next_index].bpp_x16 = 0; vars[next_index].bpp_x16 = 0;
} else { } else {
vars[next_index].pbn = kbps_to_peak_pbn( vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
params[next_index].bw_range.max_kbps);
ret = drm_dp_atomic_find_time_slots(state, ret = drm_dp_atomic_find_time_slots(state,
params[next_index].port->mgr, params[next_index].port->mgr,
params[next_index].port, params[next_index].port,
...@@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, ...@@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
int count = 0; int count = 0;
int i, k, ret; int i, k, ret;
bool debugfs_overwrite = false; bool debugfs_overwrite = false;
uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
memset(params, 0, sizeof(params)); memset(params, 0, sizeof(params));
...@@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, ...@@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
/* Try no compression */ /* Try no compression */
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
vars[i + k].aconnector = params[i].aconnector; vars[i + k].aconnector = params[i].aconnector;
vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
vars[i + k].dsc_enabled = false; vars[i + k].dsc_enabled = false;
vars[i + k].bpp_x16 = 0; vars[i + k].bpp_x16 = 0;
ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port,
...@@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, ...@@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
/* Try max compression */ /* Try max compression */
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) {
vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000);
vars[i + k].dsc_enabled = true; vars[i + k].dsc_enabled = true;
vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
...@@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, ...@@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
if (ret < 0) if (ret < 0)
return ret; return ret;
} else { } else {
vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
vars[i + k].dsc_enabled = false; vars[i + k].dsc_enabled = false;
vars[i + k].bpp_x16 = 0; vars[i + k].bpp_x16 = 0;
ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
......
...@@ -46,6 +46,9 @@ ...@@ -46,6 +46,9 @@
#define SYNAPTICS_CASCADED_HUB_ID 0x5A #define SYNAPTICS_CASCADED_HUB_ID 0x5A
#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0)
#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031
#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000
enum mst_msg_ready_type { enum mst_msg_ready_type {
NONE_MSG_RDY_EVENT = 0, NONE_MSG_RDY_EVENT = 0,
DOWN_REP_MSG_RDY_EVENT = 1, DOWN_REP_MSG_RDY_EVENT = 1,
......
...@@ -3589,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) ...@@ -3589,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
(int)hubp->curs_attr.width || pos_cpy.x (int)hubp->curs_attr.width || pos_cpy.x
<= (int)hubp->curs_attr.width + <= (int)hubp->curs_attr.width +
pipe_ctx->plane_state->src_rect.x) { pipe_ctx->plane_state->src_rect.x) {
pos_cpy.x = temp_x + viewport_width; pos_cpy.x = 2 * viewport_width - temp_x;
} }
} }
} else { } else {
...@@ -3682,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) ...@@ -3682,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
(int)hubp->curs_attr.width || pos_cpy.x (int)hubp->curs_attr.width || pos_cpy.x
<= (int)hubp->curs_attr.width + <= (int)hubp->curs_attr.width +
pipe_ctx->plane_state->src_rect.x) { pipe_ctx->plane_state->src_rect.x) {
pos_cpy.x = 2 * viewport_width - temp_x; pos_cpy.x = temp_x + viewport_width;
} }
} }
} else { } else {
......
...@@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( ...@@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1; dc->caps.color.mpc.ocsc = 1;
/* Use pipe context based otg sync logic */
dc->config.use_pipe_ctx_sync_logic = true;
dc->config.dc_mode_clk_limit_support = true; dc->config.dc_mode_clk_limit_support = true;
dc->config.enable_windowed_mpo_odm = true; dc->config.enable_windowed_mpo_odm = true;
/* read VBIOS LTTPR caps */ /* read VBIOS LTTPR caps */
......
...@@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { ...@@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE {
MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_SDMA,
MES_QUEUE_TYPE_MAX, MES_QUEUE_TYPE_MAX,
MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX,
}; };
struct MES_API_STATUS { struct MES_API_STATUS {
...@@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { ...@@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t send_write_data : 1; uint32_t send_write_data : 1;
uint32_t os_tdr_timeout_override : 1; uint32_t os_tdr_timeout_override : 1;
uint32_t use_rs64mem_for_proc_gang_ctx : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1;
uint32_t halt_on_misaligned_access : 1;
uint32_t use_add_queue_unmap_flag_addr : 1;
uint32_t enable_mes_sch_stb_log : 1;
uint32_t limit_single_process : 1;
uint32_t unmapped_doorbell_handling: 2; uint32_t unmapped_doorbell_handling: 2;
uint32_t reserved : 15; uint32_t reserved : 11;
}; };
uint32_t uint32_all; uint32_t uint32_all;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment