Commit 07e06189 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'amd-drm-fixes-6.10-2024-06-19' of...

Merge tag 'amd-drm-fixes-6.10-2024-06-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.10-2024-06-19:

amdgpu:
- Fix display idle optimization race
- Fix GPUVM TLB flush locking scope
- IPS fix
- GFX 9.4.3 harvesting fix
- Runtime pm fix for shared buffers
- DCN 3.5.x fixes
- USB4 fix
- RISC-V clang fix
- Silence UBSAN warnings
- MES11 fix
- PSP 14.0.x fix
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240619223233.3116457-1-alexander.deucher@amd.com
parents a8080132 ed5a4484
...@@ -41,8 +41,6 @@ ...@@ -41,8 +41,6 @@
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include <linux/dma-fence-array.h> #include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h> #include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
#include "amdgpu_trace.h"
/** /**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
...@@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, ...@@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct drm_gem_object *obj = dmabuf->priv; struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false; attach->peer2peer = false;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
trace_amdgpu_runpm_reference_dumps(1, __func__);
if (r < 0)
goto out;
return 0; return 0;
out:
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
trace_amdgpu_runpm_reference_dumps(0, __func__);
return r;
}
/**
* amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
*
* @dmabuf: DMA-buf where we remove the attachment from
* @attach: the attachment to remove
*
* Called when an attachment is removed from the DMA-buf.
*/
static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
trace_amdgpu_runpm_reference_dumps(0, __func__);
} }
/** /**
...@@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, ...@@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
const struct dma_buf_ops amdgpu_dmabuf_ops = { const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach, .attach = amdgpu_dma_buf_attach,
.detach = amdgpu_dma_buf_detach,
.pin = amdgpu_dma_buf_pin, .pin = amdgpu_dma_buf_pin,
.unpin = amdgpu_dma_buf_unpin, .unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map, .map_dma_buf = amdgpu_dma_buf_map,
......
...@@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd ...@@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT); seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev); pm_runtime_get_noresume(adev_to_drm(adev)->dev);
trace_amdgpu_runpm_reference_dumps(1, __func__);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) { if (unlikely(rcu_dereference_protected(*ptr, 1))) {
struct dma_fence *old; struct dma_fence *old;
...@@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) ...@@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
dma_fence_put(fence); dma_fence_put(fence);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
trace_amdgpu_runpm_reference_dumps(0, __func__);
} while (last_seq != seq); } while (last_seq != seq);
return true; return true;
......
...@@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, ...@@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
unsigned int ndw; unsigned int ndw;
signed long r; int r;
uint32_t seq; uint32_t seq;
if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || /*
!down_read_trylock(&adev->reset_domain->sem)) { * A GPU reset should flush all TLBs anyway, so no need to do
* this while one is ongoing.
*/
if (!down_read_trylock(&adev->reset_domain->sem))
return 0;
if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
if (adev->gmc.flush_tlb_needs_extra_type_2) if (adev->gmc.flush_tlb_needs_extra_type_2)
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
2, all_hub, 2, all_hub,
...@@ -703,43 +708,40 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, ...@@ -703,43 +708,40 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
flush_type, all_hub, flush_type, all_hub,
inst); inst);
return 0; r = 0;
} } else {
/* 2 dwords flush + 8 dwords fence */
ndw = kiq->pmf->invalidate_tlbs_size + 8;
/* 2 dwords flush + 8 dwords fence */ if (adev->gmc.flush_tlb_needs_extra_type_2)
ndw = kiq->pmf->invalidate_tlbs_size + 8; ndw += kiq->pmf->invalidate_tlbs_size;
if (adev->gmc.flush_tlb_needs_extra_type_2) if (adev->gmc.flush_tlb_needs_extra_type_0)
ndw += kiq->pmf->invalidate_tlbs_size; ndw += kiq->pmf->invalidate_tlbs_size;
if (adev->gmc.flush_tlb_needs_extra_type_0) spin_lock(&adev->gfx.kiq[inst].ring_lock);
ndw += kiq->pmf->invalidate_tlbs_size; amdgpu_ring_alloc(ring, ndw);
if (adev->gmc.flush_tlb_needs_extra_type_2)
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
spin_lock(&adev->gfx.kiq[inst].ring_lock); if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
amdgpu_ring_alloc(ring, ndw); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
if (adev->gmc.flush_tlb_needs_extra_type_2)
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r) {
amdgpu_ring_undo(ring);
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
goto error_unlock_reset;
}
kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); amdgpu_ring_commit(ring);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r) {
amdgpu_ring_undo(ring);
spin_unlock(&adev->gfx.kiq[inst].ring_lock); spin_unlock(&adev->gfx.kiq[inst].ring_lock);
goto error_unlock_reset; if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
} dev_err(adev->dev, "timeout waiting for kiq fence\n");
r = -ETIME;
amdgpu_ring_commit(ring); }
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
r = -ETIME;
goto error_unlock_reset;
} }
r = 0;
error_unlock_reset: error_unlock_reset:
up_read(&adev->reset_domain->sem); up_read(&adev->reset_domain->sem);
......
...@@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, ...@@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
__entry->value) __entry->value)
); );
TRACE_EVENT(amdgpu_runpm_reference_dumps,
TP_PROTO(uint32_t index, const char *func),
TP_ARGS(index, func),
TP_STRUCT__entry(
__field(uint32_t, index)
__string(func, func)
),
TP_fast_assign(
__entry->index = index;
__assign_str(func);
),
TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
__entry->index,
__get_str(func))
);
#undef AMDGPU_JOB_GET_TIMELINE_NAME #undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif #endif
......
...@@ -4195,9 +4195,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i ...@@ -4195,9 +4195,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info) struct amdgpu_cu_info *cu_info)
{ {
int i, j, k, counter, xcc_id, active_cu_number = 0; int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp;
unsigned disable_masks[4 * 4]; unsigned disable_masks[4 * 4];
bool is_symmetric_cus;
if (!adev || !cu_info) if (!adev || !cu_info)
return -EINVAL; return -EINVAL;
...@@ -4215,6 +4216,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, ...@@ -4215,6 +4216,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
is_symmetric_cus = true;
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1; mask = 1;
...@@ -4242,6 +4244,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, ...@@ -4242,6 +4244,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i][j] = ao_bitmap; cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
} }
if (i && is_symmetric_cus && prev_counter != counter)
is_symmetric_cus = false;
prev_counter = counter;
}
if (is_symmetric_cus) {
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG);
tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1);
tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp);
} }
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id); xcc_id);
......
...@@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size, void *pkt, int size,
int api_status_off) int api_status_off)
{ {
int ndw = size / 4; union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long r; signed long timeout = 3000000; /* 3000 ms */
union MESAPI__MISC *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev; struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring; struct amdgpu_ring *ring = &mes->ring;
unsigned long flags; struct MES_API_STATUS *api_status;
signed long timeout = 3000000; /* 3000 ms */ union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str; const char *op_str, *misc_op_str;
u32 fence_offset; unsigned long flags;
u64 fence_gpu_addr; u64 status_gpu_addr;
u64 *fence_ptr; u32 status_offset;
u64 *status_ptr;
signed long r;
int ret; int ret;
if (x_pkt->header.opcode >= MES_SCH_API_MAX) if (x_pkt->header.opcode >= MES_SCH_API_MAX)
...@@ -177,28 +177,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -177,28 +177,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000; timeout = 15 * 600 * 1000;
} }
BUG_ON(size % 4 != 0);
ret = amdgpu_device_wb_get(adev, &fence_offset); ret = amdgpu_device_wb_get(adev, &status_offset);
if (ret) if (ret)
return ret; return ret;
fence_gpu_addr =
adev->wb.gpu_addr + (fence_offset * 4); status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; status_ptr = (u64 *)&adev->wb.wb[status_offset];
*fence_ptr = 0; *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags); spin_lock_irqsave(&mes->ring_lock, flags);
if (amdgpu_ring_alloc(ring, ndw)) { r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
spin_unlock_irqrestore(&mes->ring_lock, flags); if (r)
amdgpu_device_wb_free(adev, fence_offset); goto error_unlock_free;
return -ENOMEM;
}
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = fence_gpu_addr; api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1; api_status->api_completion_fence_value = 1;
amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_write_multiple(ring, pkt, size / 4);
memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value =
++ring->fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags); spin_unlock_irqrestore(&mes->ring_lock, flags);
...@@ -206,15 +216,16 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -206,15 +216,16 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
if (misc_op_str) if (misc_op_str)
dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str); dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
misc_op_str);
else if (op_str) else if (op_str)
dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
else else
dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); dev_dbg(adev->dev, "MES msg=%d was emitted\n",
x_pkt->header.opcode);
r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
amdgpu_device_wb_free(adev, fence_offset); if (r < 1 || !*status_ptr) {
if (r < 1) {
if (misc_op_str) if (misc_op_str)
dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
...@@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
while (halt_if_hws_hang) while (halt_if_hws_hang)
schedule(); schedule();
return -ETIMEDOUT; r = -ETIMEDOUT;
goto error_wb_free;
} }
amdgpu_device_wb_free(adev, status_offset);
return 0; return 0;
error_unlock_free:
spin_unlock_irqrestore(&mes->ring_lock, flags);
error_wb_free:
amdgpu_device_wb_free(adev, status_offset);
return r;
} }
static int convert_to_mes_queue_type(int queue_type) static int convert_to_mes_queue_type(int queue_type)
......
...@@ -32,7 +32,9 @@ ...@@ -32,7 +32,9 @@
#include "mp/mp_14_0_2_sh_mask.h" #include "mp/mp_14_0_2_sh_mask.h"
MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
/* For large FW files the time to complete can be very long */ /* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240 #define USBC_PD_POLLING_LIMIT_S 240
...@@ -64,6 +66,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp) ...@@ -64,6 +66,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp)
case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 2):
case IP_VERSION(14, 0, 3): case IP_VERSION(14, 0, 3):
err = psp_init_sos_microcode(psp, ucode_prefix); err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
err = psp_init_ta_microcode(psp, ucode_prefix);
if (err) if (err)
return err; return err;
break; break;
......
...@@ -8,7 +8,7 @@ config DRM_AMD_DC ...@@ -8,7 +8,7 @@ config DRM_AMD_DC
depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
select SND_HDA_COMPONENT if SND_HDA_CORE select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG) select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV))
help help
Choose this option if you want to use the new display engine Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and support for AMDGPU. This adds required support for Vega and
......
...@@ -9169,9 +9169,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) ...@@ -9169,9 +9169,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
trace_amdgpu_dm_atomic_commit_tail_begin(state); trace_amdgpu_dm_atomic_commit_tail_begin(state);
if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed)
dc_allow_idle_optimizations(dm->dc, false);
drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_atomic_helper_update_legacy_modeset_state(dev, state);
drm_dp_mst_atomic_wait_for_dependencies(state); drm_dp_mst_atomic_wait_for_dependencies(state);
...@@ -11440,6 +11437,12 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev) ...@@ -11440,6 +11437,12 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev)
mutex_unlock(&adev->dm.dc_lock); mutex_unlock(&adev->dm.dc_lock);
} }
static inline void amdgpu_dm_exit_ips_for_hw_access(struct dc *dc)
{
if (dc->ctx->dmub_srv && !dc->ctx->dmub_srv->idle_exit_counter)
dc_exit_ips_for_hw_access(dc);
}
void dm_write_reg_func(const struct dc_context *ctx, uint32_t address, void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
u32 value, const char *func_name) u32 value, const char *func_name)
{ {
...@@ -11450,6 +11453,8 @@ void dm_write_reg_func(const struct dc_context *ctx, uint32_t address, ...@@ -11450,6 +11453,8 @@ void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
return; return;
} }
#endif #endif
amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
cgs_write_register(ctx->cgs_device, address, value); cgs_write_register(ctx->cgs_device, address, value);
trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value); trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value);
} }
...@@ -11473,6 +11478,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address, ...@@ -11473,6 +11478,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return 0; return 0;
} }
amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
value = cgs_read_register(ctx->cgs_device, address); value = cgs_read_register(ctx->cgs_device, address);
trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value);
......
...@@ -177,7 +177,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { ...@@ -177,7 +177,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0, .urgent_latency_vm_data_only_us = 4.0,
.dram_clock_change_latency_us = 11.72, .dram_clock_change_latency_us = 34.0,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
......
...@@ -215,7 +215,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { ...@@ -215,7 +215,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
.urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0, .urgent_latency_vm_data_only_us = 4.0,
.dram_clock_change_latency_us = 11.72, .dram_clock_change_latency_us = 34,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
......
...@@ -1439,3 +1439,75 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, ...@@ -1439,3 +1439,75 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
} }
} }
} }
static bool should_avoid_empty_tu(struct pipe_ctx *pipe_ctx)
{
/* Calculate average pixel count per TU, return false if under ~2.00 to
* avoid empty TUs. This is only required for DPIA tunneling as empty TUs
* are legal to generate for native DP links. Assume TU size 64 as there
* is currently no scenario where it's reprogrammed from HW default.
* MTPs have no such limitation, so this does not affect MST use cases.
*/
unsigned int pix_clk_mhz;
unsigned int symclk_mhz;
unsigned int avg_pix_per_tu_x1000;
unsigned int tu_size_bytes = 64;
struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
struct dc_link_settings *link_settings = &pipe_ctx->link_config.dp_link_settings;
const struct dc *dc = pipe_ctx->stream->link->dc;
if (pipe_ctx->stream->link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
return false;
// Not necessary for MST configurations
if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
return false;
pix_clk_mhz = timing->pix_clk_100hz / 10000;
// If this is true, can't block due to dynamic ODM
if (pix_clk_mhz > dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz)
return false;
switch (link_settings->link_rate) {
case LINK_RATE_LOW:
symclk_mhz = 162;
break;
case LINK_RATE_HIGH:
symclk_mhz = 270;
break;
case LINK_RATE_HIGH2:
symclk_mhz = 540;
break;
case LINK_RATE_HIGH3:
symclk_mhz = 810;
break;
default:
// We shouldn't be tunneling any other rates, something is wrong
ASSERT(0);
return false;
}
avg_pix_per_tu_x1000 = (1000 * pix_clk_mhz * tu_size_bytes)
/ (symclk_mhz * link_settings->lane_count);
// Add small empirically-decided margin to account for potential jitter
return (avg_pix_per_tu_x1000 < 2020);
}
bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
{
struct dc *dc = pipe_ctx->stream->ctx->dc;
if (!is_h_timing_divisible_by_2(pipe_ctx->stream))
return false;
if (should_avoid_empty_tu(pipe_ctx))
return false;
if (dc_is_dp_signal(pipe_ctx->stream->signal) && !dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) &&
dc->debug.enable_dp_dig_pixel_rate_div_policy)
return true;
return false;
}
...@@ -95,4 +95,6 @@ void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx, ...@@ -95,4 +95,6 @@ void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx,
void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx, void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
int num_pipes, uint32_t v_total_min, uint32_t v_total_max); int num_pipes, uint32_t v_total_min, uint32_t v_total_max);
bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
#endif /* __DC_HWSS_DCN35_H__ */ #endif /* __DC_HWSS_DCN35_H__ */
...@@ -158,7 +158,7 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { ...@@ -158,7 +158,7 @@ static const struct hwseq_private_funcs dcn35_private_funcs = {
.setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
.set_pixels_per_cycle = dcn32_set_pixels_per_cycle, .set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
.dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_control = dcn35_dsc_pg_control,
.dsc_pg_status = dcn32_dsc_pg_status, .dsc_pg_status = dcn32_dsc_pg_status,
.enable_plane = dcn35_enable_plane, .enable_plane = dcn35_enable_plane,
......
...@@ -164,6 +164,8 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev, ...@@ -164,6 +164,8 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev,
for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) { for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) {
if (table[i].ulSupportedSCLK != 0) { if (table[i].ulSupportedSCLK != 0) {
if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES)
continue;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit = vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit =
table[i].usVoltageID; table[i].usVoltageID;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit = vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit =
......
...@@ -1619,6 +1619,8 @@ void sumo_construct_vid_mapping_table(struct radeon_device *rdev, ...@@ -1619,6 +1619,8 @@ void sumo_construct_vid_mapping_table(struct radeon_device *rdev,
for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) { for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) {
if (table[i].ulSupportedSCLK != 0) { if (table[i].ulSupportedSCLK != 0) {
if (table[i].usVoltageIndex >= SUMO_MAX_NUMBER_VOLTAGES)
continue;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit = vid_mapping_table->entries[table[i].usVoltageIndex].vid_7bit =
table[i].usVoltageID; table[i].usVoltageID;
vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit = vid_mapping_table->entries[table[i].usVoltageIndex].vid_2bit =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment