Commit fc7f1d96 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdkfd: fix and enable ttmp setup for gfx11

The MES cached process context must be cleared on adding any queue for
the first time.

For proper debug support, the MES will clear it's cached process context
on the first call to SET_SHADER_DEBUGGER.

This allows TTMPs to be pesistently enabled in a safe manner.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarEric Huang <jinhuieric@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f9acfafc
...@@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev, ...@@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
{ {
uint32_t data = 0; uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
......
...@@ -2759,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug, ...@@ -2759,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
if (pdd->qpd.queue_count) if (pdd->qpd.queue_count)
return -EEXIST; return -EEXIST;
/*
* Setup TTMPs by default.
* Note that this call must remain here for MES ADD QUEUE to
* skip_process_ctx_clear unconditionally as the first call to
* SET_SHADER_DEBUGGER clears any stale process context data
* saved in MES.
*/
if (pdd->dev->kfd->shared_resources.enable_mes)
kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
} }
p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
...@@ -2852,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p) ...@@ -2852,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p)
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm); debug_refresh_runlist(pdd->dev->dqm);
else else
kfd_dbg_set_mes_debug_mode(pdd); kfd_dbg_set_mes_debug_mode(pdd,
!kfd_dbg_has_cwsr_workaround(pdd->dev));
} }
} }
......
...@@ -344,11 +344,10 @@ static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable) ...@@ -344,11 +344,10 @@ static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
return r; return r;
} }
int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd) int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{ {
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags; uint32_t flags = pdd->process->dbg_flags;
bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0; return 0;
...@@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, ...@@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm); r = debug_map_and_unlock(pdd->dev->dqm);
else else
r = kfd_dbg_set_mes_debug_mode(pdd); r = kfd_dbg_set_mes_debug_mode(pdd, true);
kfd_dbg_clear_dev_watch_id(pdd, watch_id); kfd_dbg_clear_dev_watch_id(pdd, watch_id);
...@@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, ...@@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm); r = debug_map_and_unlock(pdd->dev->dqm);
else else
r = kfd_dbg_set_mes_debug_mode(pdd); r = kfd_dbg_set_mes_debug_mode(pdd, true);
/* HWS is broken so no point in HW rollback but release the watchpoint anyways */ /* HWS is broken so no point in HW rollback but release the watchpoint anyways */
if (r) if (r)
...@@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) ...@@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm); r = debug_refresh_runlist(pdd->dev->dqm);
else else
r = kfd_dbg_set_mes_debug_mode(pdd); r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) { if (r) {
target->dbg_flags = prev_flags; target->dbg_flags = prev_flags;
...@@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) ...@@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm); debug_refresh_runlist(pdd->dev->dqm);
else else
kfd_dbg_set_mes_debug_mode(pdd); kfd_dbg_set_mes_debug_mode(pdd, true);
} }
} }
...@@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind ...@@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm); debug_refresh_runlist(pdd->dev->dqm);
else else
kfd_dbg_set_mes_debug_mode(pdd); kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
} }
kfd_dbg_set_workaround(target, false); kfd_dbg_set_workaround(target, false);
...@@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target) ...@@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm); r = debug_refresh_runlist(pdd->dev->dqm);
else else
r = kfd_dbg_set_mes_debug_mode(pdd); r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) { if (r) {
target->runtime_info.runtime_state = target->runtime_info.runtime_state =
...@@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target, ...@@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm); r = debug_refresh_runlist(pdd->dev->dqm);
else else
r = kfd_dbg_set_mes_debug_mode(pdd); r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) if (r)
break; break;
...@@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, ...@@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes) if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm); r = debug_refresh_runlist(pdd->dev->dqm);
else else
r = kfd_dbg_set_mes_debug_mode(pdd); r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) if (r)
break; break;
......
...@@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev) ...@@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
return true; return true;
} }
int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd); int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
{
return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
(dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
}
#endif #endif
...@@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, ...@@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.tma_addr = qpd->tma_addr; queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
kfd_dbg_has_ttmps_always_setup(q->device);
queue_type = convert_to_mes_queue_type(q->properties.type); queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) { if (queue_type < 0) {
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "kfd_iommu.h" #include "kfd_iommu.h"
#include "kfd_svm.h" #include "kfd_svm.h"
#include "kfd_debug.h"
#include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu.h" #include "amdgpu.h"
...@@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) ...@@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
dev->node_props.debug_prop |= dev->node_props.debug_prop |=
...@@ -1941,10 +1945,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) ...@@ -1941,10 +1945,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT; HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2))
dev->node_props.debug_prop |=
HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |= dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
...@@ -1952,9 +1952,7 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) ...@@ -1952,9 +1952,7 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT; HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0)) if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
else
dev->node_props.capability |= dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment