Commit a60d811b authored by Jay Cornwall's avatar Jay Cornwall Committed by Oded Gabbay

drm/amdkfd: Fix race between scheduler and context restore

The scheduler may raise SQ_WAVE_STATUS.SPI_PRIO via SQ_CMD before
context restore has completed. Restoring SPI_PRIO=0 after this point
may cause context save to fail as the lower priority wavefronts
are not selected for execution among spin-waiting wavefronts.

Leave SPI_PRIO at its SPI-initialized or scheduler-raised value.

v2: Also fix race with exception handler
Signed-off-by: default avatarJay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 1cd106ec
...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 ...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
...@@ -251,7 +255,7 @@ if (!EMU_RUN_HACK) ...@@ -251,7 +255,7 @@ if (!EMU_RUN_HACK)
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
s_or_b32 ttmp7, ttmp8, ttmp9 s_or_b32 ttmp7, ttmp8, ttmp9
s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler
L_NO_NEXT_TRAP: L_NO_NEXT_TRAP:
...@@ -262,7 +266,7 @@ L_NO_NEXT_TRAP: ...@@ -262,7 +266,7 @@ L_NO_NEXT_TRAP:
s_addc_u32 ttmp1, ttmp1, 0 s_addc_u32 ttmp1, ttmp1, 0
L_EXCP_CASE: L_EXCP_CASE:
s_and_b32 ttmp1, ttmp1, 0xFFFF s_and_b32 ttmp1, ttmp1, 0xFFFF
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
s_rfe_b64 [ttmp0, ttmp1] s_rfe_b64 [ttmp0, ttmp1]
end end
// ********* End handling of non-CWSR traps ******************* // ********* End handling of non-CWSR traps *******************
...@@ -1053,7 +1057,7 @@ end ...@@ -1053,7 +1057,7 @@ end
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
...@@ -1134,3 +1138,11 @@ end ...@@ -1134,3 +1138,11 @@ end
function get_hwreg_size_bytes function get_hwreg_size_bytes
return 128 //HWREG size 128 bytes return 128 //HWREG size 128 bytes
end end
function set_status_without_spi_prio(status, tmp)
// Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 ...@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000 var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
...@@ -317,7 +321,7 @@ L_EXCP_CASE: ...@@ -317,7 +321,7 @@ L_EXCP_CASE:
// Restore SQ_WAVE_STATUS. // Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status set_status_without_spi_prio(s_save_status, ttmp2)
s_rfe_b64 [ttmp0, ttmp1] s_rfe_b64 [ttmp0, ttmp1]
end end
...@@ -1120,7 +1124,7 @@ end ...@@ -1120,7 +1124,7 @@ end
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
...@@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround ...@@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround
s_waitcnt lgkmcnt(0) s_waitcnt lgkmcnt(0)
end end
end end
function set_status_without_spi_prio(status, tmp)
// Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment