Commit 1d44ff3d authored by Jay Cornwall's avatar Jay Cornwall Committed by Alex Deucher

drm/amdkfd: Trap handler changes for GC 9.4.3 v2

v1:
Check new exception bits in TRAPSTS register
Remove single step exception workaround, now part of
exception bits

v2:
GC 9.4.3 uses ttmp11 to store {1’b0, dispatch index [24:0],
wave_id_in_workgroup[5:0]}, so use ttmp13 instead of ttmp11 to
preserve ib_sts. (Laurent)
Signed-off-by: default avatarJay Cornwall <jay.cornwall@amd.com>
Signed-off-by: default avatarLaurent Morichetti <Laurent.Morichetti@amd.com>
Reviewed-by: default avatarLaurent Morichetti <laurent.morichetti@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 16b34622
...@@ -33,15 +33,20 @@ ...@@ -33,15 +33,20 @@
* aldebaran: * aldebaran:
* cpp -DASIC_FAMILY=CHIP_ALDEBARAN cwsr_trap_handler_gfx9.asm -P -o aldebaran.sp3 * cpp -DASIC_FAMILY=CHIP_ALDEBARAN cwsr_trap_handler_gfx9.asm -P -o aldebaran.sp3
* sp3 aldebaran.sp3 -hex aldebaran.hex * sp3 aldebaran.sp3 -hex aldebaran.hex
*
* gc_9_4_3:
* cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
* sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
*/ */
#define CHIP_VEGAM 18 #define CHIP_VEGAM 18
#define CHIP_ARCTURUS 23 #define CHIP_ARCTURUS 23
#define CHIP_ALDEBARAN 25 #define CHIP_ALDEBARAN 25
#define CHIP_GC_9_4_3 26
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
/**************************************************************************/ /**************************************************************************/
/* variables */ /* variables */
...@@ -77,6 +82,10 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80 ...@@ -77,6 +82,10 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
var SQ_WAVE_TRAPSTS_HOST_TRAP_MASK = 0x400000
var SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK = 0x800000
var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x1000000
var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x2000000
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
...@@ -95,10 +104,10 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 ...@@ -95,10 +104,10 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 var TTMP_DEBUG_TRAP_ENABLED_SHIFT = 23
var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 var TTMP_DEBUG_TRAP_ENABLED_MASK = 0x800000
/* Save */ /* Save */
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
...@@ -129,6 +138,11 @@ var s_save_alloc_size = s_save_trapsts //conflict ...@@ -129,6 +138,11 @@ var s_save_alloc_size = s_save_trapsts //conflict
var s_save_m0 = ttmp5 var s_save_m0 = ttmp5
var s_save_ttmps_lo = s_save_tmp //no conflict var s_save_ttmps_lo = s_save_tmp //no conflict
var s_save_ttmps_hi = s_save_trapsts //no conflict var s_save_ttmps_hi = s_save_trapsts //no conflict
#if ASIC_FAMILY >= CHIP_GC_9_4_3
var s_save_ib_sts = ttmp13
#else
var s_save_ib_sts = ttmp11
#endif
/* Restore */ /* Restore */
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
...@@ -215,9 +229,15 @@ L_NOT_HALTED: ...@@ -215,9 +229,15 @@ L_NOT_HALTED:
// Any concurrent SAVECTX will be handled upon re-entry once halted. // Any concurrent SAVECTX will be handled upon re-entry once halted.
// Check non-maskable exceptions. memory_violation, illegal_instruction // Check non-maskable exceptions. memory_violation, illegal_instruction
// and xnack_error exceptions always cause the wave to enter the trap // and debugger (host trap, wave start/end, trap after instruction)
// handler. // exceptions always cause the wave to enter the trap handler.
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK s_and_b32 ttmp2, s_save_trapsts, \
SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | \
SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK | \
SQ_WAVE_TRAPSTS_HOST_TRAP_MASK | \
SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK | \
SQ_WAVE_TRAPSTS_WAVE_END_MASK | \
SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP s_cbranch_scc1 L_FETCH_2ND_TRAP
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
...@@ -265,9 +285,9 @@ L_FETCH_2ND_TRAP: ...@@ -265,9 +285,9 @@ L_FETCH_2ND_TRAP:
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK
s_or_b32 ttmp11, ttmp11, ttmp2 s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp2
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
...@@ -1058,17 +1078,17 @@ function set_status_without_spi_prio(status, tmp) ...@@ -1058,17 +1078,17 @@ function set_status_without_spi_prio(status, tmp)
end end
function save_and_clear_ib_sts(tmp) function save_and_clear_ib_sts(tmp)
// Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space s_save_ib_sts[31:26].
s_getreg_b32 tmp, hwreg(HW_REG_IB_STS) s_getreg_b32 tmp, hwreg(HW_REG_IB_STS)
s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) s_lshl_b32 tmp, tmp, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_SAVE_RCNT_FIRST_REPLAY_MASK
s_or_b32 ttmp11, ttmp11, tmp s_or_b32 s_save_ib_sts, s_save_ib_sts, tmp
s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0 s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
end end
function restore_ib_sts(tmp) function restore_ib_sts(tmp)
s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) s_lshr_b32 tmp, s_save_ib_sts, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp s_setreg_b32 hwreg(HW_REG_IB_STS), tmp
end end
...@@ -452,9 +452,9 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) ...@@ -452,9 +452,9 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
kfd->cwsr_isa = cwsr_trap_aldebaran_hex; kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) { } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_aldebaran_hex; kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex; kfd->cwsr_isa = cwsr_trap_gfx9_hex;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment