Commit 450abfe4 authored by Lancelot SIX's avatar Lancelot SIX Committed by Alex Deucher

drm/amdkfd: save and restore barrier state for gfx12

Add support to save and restore the work group barrier state in gfx12
CWSR trap handler.

There is no support to directly restore the signal count of a barrier
state, so instead this patch repeatedly calls s_barrier_signal to
increment the signal count to the desired value.

In this patch, I have implemented the logic to restore the barrier at
the end of the block restoring the HWREGs.  This process needs to be
done by exactly 1 wave per work group.  To achieve this, the initial
value of s_restore_spi_init_hi (containing a FIRST_WAVE bit) needs to be
saved up until that point.  An alternative could be restore the barrier
earlier in the process (around when LDS is restored, as the same wave
does both).  Doing this would break the pattern that the restore
procedure follows the CWSR area layout.

Before restoring the barrier, this patch checks if the barrier was whose
state was saved has the "valid" bit set, even if I don't think this
barrier can be in an invalid state during context save.  I expect this
test to always be true.
Signed-off-by: default avatarLancelot SIX <lancelot.six@amd.com>
Reviewed-by: default avatarJay Cornwall <jay.cornwall@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f2810033
......@@ -3647,7 +3647,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
};
static const uint32_t cwsr_trap_gfx12_hex[] = {
0xbfa00001, 0xbfa0023b,
0xbfa00001, 0xbfa00240,
0xb0804009, 0xb8f8f804,
0x9178ff78, 0x00008c00,
0xb8fbf811, 0x8b6eff78,
......@@ -3781,21 +3781,57 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0x0000fa71, 0x807d817d,
0xb8faf802, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xbefe00ff, 0x0000ffff,
0xbeff0080, 0xc4068070,
0xbefa50c1, 0xbfc70000,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xbefe00ff,
0x0000ffff, 0xbeff0080,
0xc4068070, 0x008ce802,
0x00000000, 0xbefe00c1,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
0x84708a70, 0xb8fa1e06,
0x847a8a7a, 0x80707a70,
0xbef600ff, 0x01000000,
0xbef90080, 0xbefd0080,
0xbf800000, 0xbe804100,
0xbe824102, 0xbe844104,
0xbe864106, 0xbe884108,
0xbe8a410a, 0xbe8c410c,
0xbe8e410e, 0xd7610002,
0x0000f200, 0x80798179,
0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
0x0000f202, 0x80798179,
0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
0x0000f204, 0x80798179,
0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
0x0000f206, 0x80798179,
0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
0x0000f208, 0x80798179,
0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
0x0000f20a, 0x80798179,
0xd7610002, 0x0000f20b,
0x80798179, 0xd7610002,
0x0000f20c, 0x80798179,
0xd7610002, 0x0000f20d,
0x80798179, 0xd7610002,
0x0000f20e, 0x80798179,
0xd7610002, 0x0000f20f,
0x80798179, 0xbf06a079,
0xbfa10007, 0xc4068070,
0x008ce802, 0x00000000,
0xbefe00c1, 0xb8f03b05,
0x80708170, 0xbf0d9973,
0xbfa20002, 0x84708970,
0xbfa00001, 0x84708a70,
0xb8fa1e06, 0x847a8a7a,
0x80707a70, 0xbef600ff,
0x01000000, 0xbef90080,
0xbefd0080, 0xbf800000,
0x8070ff70, 0x00000080,
0xbef90080, 0x7e040280,
0x807d907d, 0xbf0aff7d,
0x00000060, 0xbfa2ffbb,
0xbe804100, 0xbe824102,
0xbe844104, 0xbe864106,
0xbe884108, 0xbe8a410a,
0xbe8c410c, 0xbe8e410e,
0xd7610002, 0x0000f200,
0x80798179, 0xd7610002,
0x0000f201, 0x80798179,
......@@ -3814,130 +3850,97 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0xd7610002, 0x0000f20a,
0x80798179, 0xd7610002,
0x0000f20b, 0x80798179,
0xd7610002, 0x0000f20c,
0x80798179, 0xd7610002,
0x0000f20d, 0x80798179,
0xd7610002, 0x0000f20e,
0x80798179, 0xd7610002,
0x0000f20f, 0x80798179,
0xbf06a079, 0xbfa10007,
0xc4068070, 0x008ce802,
0x00000000, 0x8070ff70,
0x00000080, 0xbef90080,
0x7e040280, 0x807d907d,
0xbf0aff7d, 0x00000060,
0xbfa2ffbb, 0xbe804100,
0xbe824102, 0xbe844104,
0xbe864106, 0xbe884108,
0xbe8a410a, 0xd7610002,
0x0000f200, 0x80798179,
0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
0x0000f202, 0x80798179,
0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
0x0000f204, 0x80798179,
0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
0x0000f206, 0x80798179,
0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
0x0000f208, 0x80798179,
0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
0x0000f20a, 0x80798179,
0xd7610002, 0x0000f20b,
0x80798179, 0xc4068070,
0x008ce802, 0x00000000,
0xbefe00c1, 0x857d9973,
0x8b7d817d, 0xbf06817d,
0xbfa20002, 0xbeff0080,
0xbfa00001, 0xbeff00c1,
0xb8fb4306, 0x8b7bc17b,
0xbfa10045, 0x8b7aff6d,
0x80000000, 0xbfa10042,
0x847b867b, 0x847b827b,
0xbef6007b, 0xb8f03b05,
0x80708170, 0xbf0d9973,
0xbfa20002, 0x84708970,
0xbfa00001, 0x84708a70,
0xb8fa1e06, 0x847a8a7a,
0x80707a70, 0x8070ff70,
0x00000200, 0x8070ff70,
0x00000080, 0xbef600ff,
0x01000000, 0xd71f0000,
0x000100c1, 0xd7200000,
0x000200c1, 0x16000084,
0x00000000, 0xbefe00c1,
0x857d9973, 0x8b7d817d,
0xbf06817d, 0xbefd0080,
0xbfa20013, 0xbe8300ff,
0x00000080, 0xbf800000,
0xbf800000, 0xbf800000,
0xd8d80000, 0x01000000,
0xbf890000, 0xc4068070,
0x008ce801, 0x00000000,
0x807d037d, 0x80700370,
0xd5250000, 0x0001ff00,
0x00000080, 0xbf0a7b7d,
0xbfa2fff3, 0xbfa00012,
0xbe8300ff, 0x00000100,
0xbf06817d, 0xbfa20002,
0xbeff0080, 0xbfa00001,
0xbeff00c1, 0xb8fb4306,
0x8b7bc17b, 0xbfa10045,
0x8b7aff6d, 0x80000000,
0xbfa10042, 0x847b867b,
0x847b827b, 0xbef6007b,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
0x84708a70, 0xb8fa1e06,
0x847a8a7a, 0x80707a70,
0x8070ff70, 0x00000200,
0x8070ff70, 0x00000080,
0xbef600ff, 0x01000000,
0xd71f0000, 0x000100c1,
0xd7200000, 0x000200c1,
0x16000084, 0x857d9973,
0x8b7d817d, 0xbf06817d,
0xbefd0080, 0xbfa20013,
0xbe8300ff, 0x00000080,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf890000,
0xc4068070, 0x008ce801,
0x00000000, 0x807d037d,
0x80700370, 0xd5250000,
0x0001ff00, 0x00000100,
0x0001ff00, 0x00000080,
0xbf0a7b7d, 0xbfa2fff3,
0xbefe00c1, 0x857d9973,
0x8b7d817d, 0xbf06817d,
0xbfa20004, 0xbef000ff,
0x00000200, 0xbeff0080,
0xbfa00003, 0xbef000ff,
0x00000400, 0xbeff00c1,
0xb8fb3b05, 0x807b817b,
0x847b827b, 0x857d9973,
0x8b7d817d, 0xbf06817d,
0xbfa2001b, 0xbef600ff,
0x01000000, 0xbefd0084,
0xbf0a7b7d, 0xbfa10040,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
0xc4068070, 0x008ce800,
0x00000000, 0xc4068070,
0x008ce801, 0x00008000,
0xc4068070, 0x008ce802,
0x00010000, 0xc4068070,
0x008ce803, 0x00018000,
0x807d847d, 0x8070ff70,
0x00000200, 0xbf0a7b7d,
0xbfa2ffeb, 0xbfa0002a,
0xbfa00012, 0xbe8300ff,
0x00000100, 0xbf800000,
0xbf800000, 0xbf800000,
0xd8d80000, 0x01000000,
0xbf890000, 0xc4068070,
0x008ce801, 0x00000000,
0x807d037d, 0x80700370,
0xd5250000, 0x0001ff00,
0x00000100, 0xbf0a7b7d,
0xbfa2fff3, 0xbefe00c1,
0x857d9973, 0x8b7d817d,
0xbf06817d, 0xbfa20004,
0xbef000ff, 0x00000200,
0xbeff0080, 0xbfa00003,
0xbef000ff, 0x00000400,
0xbeff00c1, 0xb8fb3b05,
0x807b817b, 0x847b827b,
0x857d9973, 0x8b7d817d,
0xbf06817d, 0xbfa2001b,
0xbef600ff, 0x01000000,
0xbefd0084, 0xbf0a7b7d,
0xbfa10015, 0x7e008700,
0xbfa10040, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xc4068070,
0x008ce800, 0x00000000,
0xc4068070, 0x008ce801,
0x00010000, 0xc4068070,
0x008ce802, 0x00020000,
0x00008000, 0xc4068070,
0x008ce802, 0x00010000,
0xc4068070, 0x008ce803,
0x00030000, 0x807d847d,
0x8070ff70, 0x00000400,
0x00018000, 0x807d847d,
0x8070ff70, 0x00000200,
0xbf0a7b7d, 0xbfa2ffeb,
0xb8fb1e06, 0x8b7bc17b,
0xbfa1000d, 0x847b837b,
0x807b7d7b, 0xbefe00c1,
0xbeff0080, 0x7e008700,
0xbfa0002a, 0xbef600ff,
0x01000000, 0xbefd0084,
0xbf0a7b7d, 0xbfa10015,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
0xc4068070, 0x008ce800,
0x00000000, 0x807d817d,
0x8070ff70, 0x00000080,
0xbf0a7b7d, 0xbfa2fff7,
0xbfa00159, 0xbef4007e,
0x8b75ff7f, 0x0000ffff,
0x8c75ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x10807fac, 0xb8f20742,
0x00000000, 0xc4068070,
0x008ce801, 0x00010000,
0xc4068070, 0x008ce802,
0x00020000, 0xc4068070,
0x008ce803, 0x00030000,
0x807d847d, 0x8070ff70,
0x00000400, 0xbf0a7b7d,
0xbfa2ffeb, 0xb8fb1e06,
0x8b7bc17b, 0xbfa1000d,
0x847b837b, 0x807b7d7b,
0xbefe00c1, 0xbeff0080,
0x7e008700, 0xc4068070,
0x008ce800, 0x00000000,
0x807d817d, 0x8070ff70,
0x00000080, 0xbf0a7b7d,
0xbfa2fff7, 0xbfa0016b,
0xbef4007e, 0x8b75ff7f,
0x0000ffff, 0x8c75ff75,
0x00040000, 0xbef60080,
0xbef700ff, 0x10807fac,
0xbef1007f, 0xb8f20742,
0x84729972, 0x8b6eff7f,
0x04000000, 0xbfa1003c,
0xbefe00c1, 0x857d9972,
......@@ -4064,49 +4067,58 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0xb8ee1e06, 0x846e8a6e,
0x80786e78, 0x8078ff78,
0x00000200, 0xbef600ff,
0x01000000, 0xf4621bfa,
0x01000000, 0xbeff0071,
0xf4621bfa, 0xf0000000,
0x80788478, 0xf4621b3a,
0xf0000000, 0x80788478,
0xf4621b3a, 0xf0000000,
0x80788478, 0xf4621b7a,
0xf4621b7a, 0xf0000000,
0x80788478, 0xf4621c3a,
0xf0000000, 0x80788478,
0xf4621c3a, 0xf0000000,
0x80788478, 0xf4621c7a,
0xf4621c7a, 0xf0000000,
0x80788478, 0xf4621eba,
0xf0000000, 0x80788478,
0xf4621eba, 0xf0000000,
0x80788478, 0xf4621efa,
0xf4621efa, 0xf0000000,
0x80788478, 0xf4621e7a,
0xf0000000, 0x80788478,
0xf4621e7a, 0xf0000000,
0x80788478, 0xf4621cfa,
0xf4621cfa, 0xf0000000,
0x80788478, 0xf4621bba,
0xf0000000, 0x80788478,
0xbf89fc07, 0xb96ef814,
0xf4621bba, 0xf0000000,
0x80788478, 0xbf89fc07,
0xb96ef814, 0xf4621bba,
0xb96ef815, 0xf4621bba,
0xf0000000, 0x80788478,
0xbf89fc07, 0xb96ef815,
0xbf89fc07, 0xb96ef812,
0xf4621bba, 0xf0000000,
0x80788478, 0xbf89fc07,
0xb96ef812, 0xf4621bba,
0xb96ef813, 0x8b6eff7f,
0x04000000, 0xbfa1000d,
0x80788478, 0xf4621bba,
0xf0000000, 0x80788478,
0xbf89fc07, 0xb96ef813,
0xbe804ec2, 0xbf94fffe,
0xbefd006f, 0xbefe0070,
0xbeff0071, 0xb97bf811,
0xb973f801, 0xb8ee3b05,
0x806e816e, 0xbf0d9972,
0xbfa20002, 0x846e896e,
0xbfa00001, 0x846e8a6e,
0xb8ef1e06, 0x846f8a6f,
0x806e6f6e, 0x806eff6e,
0x00000200, 0x806e746e,
0x826f8075, 0x8b6fff6f,
0x0000ffff, 0xf4605c37,
0xf8000050, 0xf4605d37,
0xf8000060, 0xf4601e77,
0xf8000074, 0xbf89fc07,
0x8b6dff6d, 0x0000ffff,
0x8bfe7e7e, 0x8bea6a6a,
0xb97af804, 0xbe804a6c,
0xbfb00000, 0xbf9f0000,
0xbf89fc07, 0xbf0d806e,
0xbfa10006, 0x856e906e,
0x8b6e6e6e, 0xbfa10003,
0xbe804ec1, 0x816ec16e,
0xbfa0fffb, 0xbe804ec2,
0xbf94fffe, 0xbefd006f,
0xbefe0070, 0xbeff0071,
0xb97bf811, 0xb973f801,
0xb8ee3b05, 0x806e816e,
0xbf0d9972, 0xbfa20002,
0x846e896e, 0xbfa00001,
0x846e8a6e, 0xb8ef1e06,
0x846f8a6f, 0x806e6f6e,
0x806eff6e, 0x00000200,
0x806e746e, 0x826f8075,
0x8b6fff6f, 0x0000ffff,
0xf4605c37, 0xf8000050,
0xf4605d37, 0xf8000060,
0xf4601e77, 0xf8000074,
0xbf89fc07, 0x8b6dff6d,
0x0000ffff, 0x8bfe7e7e,
0x8bea6a6a, 0xb97af804,
0xbe804a6c, 0xbfb00000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};
......@@ -154,6 +154,8 @@ var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK|SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK
var BARRIER_STATE_SIGNAL_OFFSET = 16
var BARRIER_STATE_VALID_OFFSET = 0
#endif
// bits [31:24] unused by SPI debug data
......@@ -227,6 +229,7 @@ var s_restore_buf_rsrc3 = ttmp11
var s_restore_size = ttmp6
var s_restore_ttmps_lo = s_restore_tmp
var s_restore_ttmps_hi = s_restore_alloc_size
var s_restore_spi_init_hi_save = s_restore_exec_hi
shader main
asic(DEFAULT)
......@@ -639,6 +642,10 @@ L_SAVE_HWREG:
s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
s_get_barrier_state s_save_tmp, -1
s_wait_kmcnt (0)
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
#endif
#if NO_SQC_STORE
......@@ -1001,6 +1008,11 @@ L_RESTORE:
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
#if ASIC_FAMILY >= CHIP_GFX12
// Save s_restore_spi_init_hi for later use.
s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
#endif
//determine it is wave32 or wave64
get_wave_size2(s_restore_size)
......@@ -1250,6 +1262,11 @@ L_RESTORE_HWREG:
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
#if ASIC_FAMILY >= CHIP_GFX12
// Restore s_restore_spi_init_hi before the saved value gets clobbered.
s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
#endif
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
......@@ -1278,6 +1295,32 @@ L_RESTORE_HWREG:
s_waitcnt lgkmcnt(0)
s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
// Only the first wave needs to restore the workgroup barrier.
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
// Skip over WAVE_STATUS, since there is no state to restore from it
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
s_waitcnt lgkmcnt(0)
s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
// extract the saved signal count from s_restore_tmp
s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
// We need to call s_barrier_signal repeatedly to restore the signal
// count of the work group barrier. The member count is already
// initialized with the number of waves in the work group.
L_BARRIER_RESTORE_LOOP:
s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
s_barrier_signal -1
s_add_i32 s_restore_tmp, s_restore_tmp, -1
s_branch L_BARRIER_RESTORE_LOOP
L_SKIP_BARRIER_RESTORE:
// Make barrier and LDS state visible to all waves in the group.
s_barrier_signal -2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment