Commit 22c775ce authored by Alex Deucher's avatar Alex Deucher

drm/radeon: implement clock and power gating for CIK (v3)

Only the APUs support power gating.

v2: disable cgcg for now
v3: workaround hw issue in mgcg
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1fd11777
This diff is collapsed.
...@@ -299,6 +299,10 @@ ...@@ -299,6 +299,10 @@
#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C #define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C
#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x1580 #define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x1580
#define VM_L2_CG 0x15c0
#define MC_CG_ENABLE (1 << 18)
#define MC_LS_ENABLE (1 << 19)
#define MC_SHARED_CHMAP 0x2004 #define MC_SHARED_CHMAP 0x2004
#define NOOFCHAN_SHIFT 12 #define NOOFCHAN_SHIFT 12
#define NOOFCHAN_MASK 0x0000f000 #define NOOFCHAN_MASK 0x0000f000
...@@ -328,6 +332,17 @@ ...@@ -328,6 +332,17 @@
#define MC_SHARED_BLACKOUT_CNTL 0x20ac #define MC_SHARED_BLACKOUT_CNTL 0x20ac
#define MC_HUB_MISC_HUB_CG 0x20b8
#define MC_HUB_MISC_VM_CG 0x20bc
#define MC_HUB_MISC_SIP_CG 0x20c0
#define MC_XPB_CLK_GAT 0x2478
#define MC_CITF_MISC_RD_CG 0x2648
#define MC_CITF_MISC_WR_CG 0x264c
#define MC_CITF_MISC_VM_CG 0x2650
#define MC_ARB_RAMCFG 0x2760 #define MC_ARB_RAMCFG 0x2760
#define NOOFBANK_SHIFT 0 #define NOOFBANK_SHIFT 0
#define NOOFBANK_MASK 0x00000003 #define NOOFBANK_MASK 0x00000003
...@@ -357,6 +372,7 @@ ...@@ -357,6 +372,7 @@
#define MC_SEQ_IO_DEBUG_DATA 0x2a48 #define MC_SEQ_IO_DEBUG_DATA 0x2a48
#define HDP_HOST_PATH_CNTL 0x2C00 #define HDP_HOST_PATH_CNTL 0x2C00
#define CLOCK_GATING_DIS (1 << 23)
#define HDP_NONSURFACE_BASE 0x2C04 #define HDP_NONSURFACE_BASE 0x2C04
#define HDP_NONSURFACE_INFO 0x2C08 #define HDP_NONSURFACE_INFO 0x2C08
#define HDP_NONSURFACE_SIZE 0x2C0C #define HDP_NONSURFACE_SIZE 0x2C0C
...@@ -364,6 +380,10 @@ ...@@ -364,6 +380,10 @@
#define HDP_ADDR_CONFIG 0x2F48 #define HDP_ADDR_CONFIG 0x2F48
#define HDP_MISC_CNTL 0x2F4C #define HDP_MISC_CNTL 0x2F4C
#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) #define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
#define HDP_MEM_POWER_LS 0x2F50
#define HDP_LS_ENABLE (1 << 0)
#define ATC_MISC_CG 0x3350
#define IH_RB_CNTL 0x3e00 #define IH_RB_CNTL 0x3e00
# define IH_RB_ENABLE (1 << 0) # define IH_RB_ENABLE (1 << 0)
...@@ -631,6 +651,9 @@ ...@@ -631,6 +651,9 @@
#define CP_RB0_RPTR 0x8700 #define CP_RB0_RPTR 0x8700
#define CP_RB_WPTR_DELAY 0x8704 #define CP_RB_WPTR_DELAY 0x8704
#define CP_RB_WPTR_POLL_CNTL 0x8708
#define IDLE_POLL_COUNT(x) ((x) << 16)
#define IDLE_POLL_COUNT_MASK (0xffff << 16)
#define CP_MEQ_THRESHOLDS 0x8764 #define CP_MEQ_THRESHOLDS 0x8764
#define MEQ1_START(x) ((x) << 0) #define MEQ1_START(x) ((x) << 0)
...@@ -857,6 +880,9 @@ ...@@ -857,6 +880,9 @@
# define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID1_INT_STAT (1 << 30)
# define CP_RINGID0_INT_STAT (1 << 31) # define CP_RINGID0_INT_STAT (1 << 31)
#define CP_MEM_SLP_CNTL 0xC1E4
# define CP_MEM_LS_EN (1 << 0)
#define CP_CPF_DEBUG 0xC200 #define CP_CPF_DEBUG 0xC200
#define CP_PQ_WPTR_POLL_CNTL 0xC20C #define CP_PQ_WPTR_POLL_CNTL 0xC20C
...@@ -902,6 +928,9 @@ ...@@ -902,6 +928,9 @@
#define RLC_MC_CNTL 0xC30C #define RLC_MC_CNTL 0xC30C
#define RLC_MEM_SLP_CNTL 0xC318
# define RLC_MEM_LS_EN (1 << 0)
#define RLC_LB_CNTR_MAX 0xC348 #define RLC_LB_CNTR_MAX 0xC348
#define RLC_LB_CNTL 0xC364 #define RLC_LB_CNTL 0xC364
...@@ -910,7 +939,9 @@ ...@@ -910,7 +939,9 @@
#define RLC_LB_CNTR_INIT 0xC36C #define RLC_LB_CNTR_INIT 0xC36C
#define RLC_SAVE_AND_RESTORE_BASE 0xC374 #define RLC_SAVE_AND_RESTORE_BASE 0xC374
#define RLC_DRIVER_DMA_STATUS 0xC378 #define RLC_DRIVER_DMA_STATUS 0xC378 /* dGPU */
#define RLC_CP_TABLE_RESTORE 0xC378 /* APU */
#define RLC_PG_DELAY_2 0xC37C
#define RLC_GPM_UCODE_ADDR 0xC388 #define RLC_GPM_UCODE_ADDR 0xC388
#define RLC_GPM_UCODE_DATA 0xC38C #define RLC_GPM_UCODE_DATA 0xC38C
...@@ -919,12 +950,50 @@ ...@@ -919,12 +950,50 @@
#define RLC_CAPTURE_GPU_CLOCK_COUNT 0xC398 #define RLC_CAPTURE_GPU_CLOCK_COUNT 0xC398
#define RLC_UCODE_CNTL 0xC39C #define RLC_UCODE_CNTL 0xC39C
#define RLC_GPM_STAT 0xC400
# define RLC_GPM_BUSY (1 << 0)
#define RLC_PG_CNTL 0xC40C
# define GFX_PG_ENABLE (1 << 0)
# define GFX_PG_SRC (1 << 1)
# define DYN_PER_CU_PG_ENABLE (1 << 2)
# define STATIC_PER_CU_PG_ENABLE (1 << 3)
# define DISABLE_GDS_PG (1 << 13)
# define DISABLE_CP_PG (1 << 15)
# define SMU_CLK_SLOWDOWN_ON_PU_ENABLE (1 << 17)
# define SMU_CLK_SLOWDOWN_ON_PD_ENABLE (1 << 18)
#define RLC_CGTT_MGCG_OVERRIDE 0xC420
#define RLC_CGCG_CGLS_CTRL 0xC424 #define RLC_CGCG_CGLS_CTRL 0xC424
# define CGCG_EN (1 << 0)
# define CGLS_EN (1 << 1)
#define RLC_PG_DELAY 0xC434
#define RLC_LB_INIT_CU_MASK 0xC43C #define RLC_LB_INIT_CU_MASK 0xC43C
#define RLC_LB_PARAMS 0xC444 #define RLC_LB_PARAMS 0xC444
#define RLC_PG_AO_CU_MASK 0xC44C
#define RLC_MAX_PG_CU 0xC450
# define MAX_PU_CU(x) ((x) << 0)
# define MAX_PU_CU_MASK (0xff << 0)
#define RLC_AUTO_PG_CTRL 0xC454
# define AUTO_PG_EN (1 << 0)
# define GRBM_REG_SGIT(x) ((x) << 3)
# define GRBM_REG_SGIT_MASK (0xffff << 3)
#define RLC_SERDES_WR_CU_MASTER_MASK 0xC474
#define RLC_SERDES_WR_NONCU_MASTER_MASK 0xC478
#define RLC_SERDES_WR_CTRL 0xC47C
#define BPM_ADDR(x) ((x) << 0)
#define BPM_ADDR_MASK (0xff << 0)
#define CGLS_ENABLE (1 << 16)
#define CGCG_OVERRIDE_0 (1 << 20)
#define MGCG_OVERRIDE_0 (1 << 22)
#define MGCG_OVERRIDE_1 (1 << 23)
#define RLC_SERDES_CU_MASTER_BUSY 0xC484 #define RLC_SERDES_CU_MASTER_BUSY 0xC484
#define RLC_SERDES_NONCU_MASTER_BUSY 0xC488 #define RLC_SERDES_NONCU_MASTER_BUSY 0xC488
# define SE_MASTER_BUSY_MASK 0x0000ffff # define SE_MASTER_BUSY_MASK 0x0000ffff
...@@ -979,6 +1048,8 @@ ...@@ -979,6 +1048,8 @@
#define MQD_VMID(x) ((x) << 0) #define MQD_VMID(x) ((x) << 0)
#define MQD_VMID_MASK (0xf << 0) #define MQD_VMID_MASK (0xf << 0)
#define DB_RENDER_CONTROL 0x28000
#define PA_SC_RASTER_CONFIG 0x28350 #define PA_SC_RASTER_CONFIG 0x28350
# define RASTER_CONFIG_RB_MAP_0 0 # define RASTER_CONFIG_RB_MAP_0 0
# define RASTER_CONFIG_RB_MAP_1 1 # define RASTER_CONFIG_RB_MAP_1 1
...@@ -1072,6 +1143,16 @@ ...@@ -1072,6 +1143,16 @@
#define CP_PERFMON_CNTL 0x36020 #define CP_PERFMON_CNTL 0x36020
#define CGTS_SM_CTRL_REG 0x3c000
#define SM_MODE(x) ((x) << 17)
#define SM_MODE_MASK (0x7 << 17)
#define SM_MODE_ENABLE (1 << 20)
#define CGTS_OVERRIDE (1 << 21)
#define CGTS_LS_OVERRIDE (1 << 22)
#define ON_MONITOR_ADD_EN (1 << 23)
#define ON_MONITOR_ADD(x) ((x) << 24)
#define ON_MONITOR_ADD_MASK (0xff << 24)
#define CGTS_TCC_DISABLE 0x3c00c #define CGTS_TCC_DISABLE 0x3c00c
#define CGTS_USER_TCC_DISABLE 0x3c010 #define CGTS_USER_TCC_DISABLE 0x3c010
#define TCC_DISABLE_MASK 0xFFFF0000 #define TCC_DISABLE_MASK 0xFFFF0000
...@@ -1304,6 +1385,8 @@ ...@@ -1304,6 +1385,8 @@
#define SDMA0_UCODE_ADDR 0xD000 #define SDMA0_UCODE_ADDR 0xD000
#define SDMA0_UCODE_DATA 0xD004 #define SDMA0_UCODE_DATA 0xD004
#define SDMA0_POWER_CNTL 0xD008
#define SDMA0_CLK_CTRL 0xD00C
#define SDMA0_CNTL 0xD010 #define SDMA0_CNTL 0xD010
# define TRAP_ENABLE (1 << 0) # define TRAP_ENABLE (1 << 0)
...@@ -1428,6 +1511,13 @@ ...@@ -1428,6 +1511,13 @@
#define UVD_RBC_RB_RPTR 0xf690 #define UVD_RBC_RB_RPTR 0xf690
#define UVD_RBC_RB_WPTR 0xf694 #define UVD_RBC_RB_WPTR 0xf694
#define UVD_CGC_CTRL 0xF4B0
# define DCM (1 << 0)
# define CG_DT(x) ((x) << 2)
# define CG_DT_MASK (0xf << 2)
# define CLK_OD(x) ((x) << 6)
# define CLK_OD_MASK (0x1f << 6)
/* UVD clocks */ /* UVD clocks */
#define CG_DCLK_CNTL 0xC050009C #define CG_DCLK_CNTL 0xC050009C
...@@ -1438,4 +1528,7 @@ ...@@ -1438,4 +1528,7 @@
#define CG_VCLK_CNTL 0xC05000A4 #define CG_VCLK_CNTL 0xC05000A4
#define CG_VCLK_STATUS 0xC05000A8 #define CG_VCLK_STATUS 0xC05000A8
/* UVD CTX indirect */
#define UVD_CGC_MEM_CTRL 0xC0
#endif #endif
This diff is collapsed.
...@@ -140,6 +140,7 @@ extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev, ...@@ -140,6 +140,7 @@ extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
int ring, u32 cp_int_cntl); int ring, u32 cp_int_cntl);
extern void cayman_vm_decode_fault(struct radeon_device *rdev, extern void cayman_vm_decode_fault(struct radeon_device *rdev,
u32 status, u32 addr); u32 status, u32 addr);
void cik_init_cp_pg_table(struct radeon_device *rdev);
static const u32 evergreen_golden_registers[] = static const u32 evergreen_golden_registers[] =
{ {
...@@ -3893,8 +3894,22 @@ void sumo_rlc_fini(struct radeon_device *rdev) ...@@ -3893,8 +3894,22 @@ void sumo_rlc_fini(struct radeon_device *rdev)
radeon_bo_unref(&rdev->rlc.clear_state_obj); radeon_bo_unref(&rdev->rlc.clear_state_obj);
rdev->rlc.clear_state_obj = NULL; rdev->rlc.clear_state_obj = NULL;
} }
/* clear state block */
if (rdev->rlc.cp_table_obj) {
r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
if (unlikely(r != 0))
dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
radeon_bo_unpin(rdev->rlc.cp_table_obj);
radeon_bo_unreserve(rdev->rlc.cp_table_obj);
radeon_bo_unref(&rdev->rlc.cp_table_obj);
rdev->rlc.cp_table_obj = NULL;
}
} }
#define CP_ME_TABLE_SIZE 96
int sumo_rlc_init(struct radeon_device *rdev) int sumo_rlc_init(struct radeon_device *rdev)
{ {
const u32 *src_ptr; const u32 *src_ptr;
...@@ -3980,9 +3995,10 @@ int sumo_rlc_init(struct radeon_device *rdev) ...@@ -3980,9 +3995,10 @@ int sumo_rlc_init(struct radeon_device *rdev)
} }
reg_list_blk_index = (3 * reg_list_num + 2); reg_list_blk_index = (3 * reg_list_num + 2);
dws += reg_list_blk_index; dws += reg_list_blk_index;
rdev->rlc.clear_state_size = dws;
if (rdev->rlc.clear_state_obj == NULL) { if (rdev->rlc.clear_state_obj == NULL) {
r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, r = radeon_bo_create(rdev, rdev->rlc.clear_state_size * 4, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
if (r) { if (r) {
dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
...@@ -4046,6 +4062,45 @@ int sumo_rlc_init(struct radeon_device *rdev) ...@@ -4046,6 +4062,45 @@ int sumo_rlc_init(struct radeon_device *rdev)
radeon_bo_unreserve(rdev->rlc.clear_state_obj); radeon_bo_unreserve(rdev->rlc.clear_state_obj);
} }
if (rdev->rlc.cp_table_size) {
if (rdev->rlc.cp_table_obj == NULL) {
r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
sumo_rlc_fini(rdev);
return r;
}
}
r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
if (unlikely(r != 0)) {
dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
sumo_rlc_fini(rdev);
return r;
}
r = radeon_bo_pin(rdev->rlc.cp_table_obj, RADEON_GEM_DOMAIN_VRAM,
&rdev->rlc.cp_table_gpu_addr);
if (r) {
radeon_bo_unreserve(rdev->rlc.cp_table_obj);
dev_warn(rdev->dev, "(%d) pin RLC cp_table bo failed\n", r);
sumo_rlc_fini(rdev);
return r;
}
r = radeon_bo_kmap(rdev->rlc.cp_table_obj, (void **)&rdev->rlc.cp_table_ptr);
if (r) {
dev_warn(rdev->dev, "(%d) map RLC cp table bo failed\n", r);
sumo_rlc_fini(rdev);
return r;
}
cik_init_cp_pg_table(rdev);
radeon_bo_kunmap(rdev->rlc.cp_table_obj);
radeon_bo_unreserve(rdev->rlc.cp_table_obj);
}
return 0; return 0;
} }
......
...@@ -152,6 +152,14 @@ extern int radeon_aspm; ...@@ -152,6 +152,14 @@ extern int radeon_aspm;
#define RADEON_RESET_MC (1 << 10) #define RADEON_RESET_MC (1 << 10)
#define RADEON_RESET_DISPLAY (1 << 11) #define RADEON_RESET_DISPLAY (1 << 11)
/* CG block flags */
#define RADEON_CG_BLOCK_GFX (1 << 0)
#define RADEON_CG_BLOCK_MC (1 << 1)
#define RADEON_CG_BLOCK_SDMA (1 << 2)
#define RADEON_CG_BLOCK_UVD (1 << 3)
#define RADEON_CG_BLOCK_VCE (1 << 4)
#define RADEON_CG_BLOCK_HDP (1 << 5)
/* max cursor sizes (in pixels) */ /* max cursor sizes (in pixels) */
#define CURSOR_WIDTH 64 #define CURSOR_WIDTH 64
#define CURSOR_HEIGHT 64 #define CURSOR_HEIGHT 64
...@@ -861,6 +869,12 @@ struct radeon_rlc { ...@@ -861,6 +869,12 @@ struct radeon_rlc {
uint64_t clear_state_gpu_addr; uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr; volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data; const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct radeon_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
}; };
int radeon_ib_get(struct radeon_device *rdev, int ring, int radeon_ib_get(struct radeon_device *rdev, int ring,
......
...@@ -2780,6 +2780,7 @@ int radeon_asic_init(struct radeon_device *rdev) ...@@ -2780,6 +2780,7 @@ int radeon_asic_init(struct radeon_device *rdev)
case CHIP_BONAIRE: case CHIP_BONAIRE:
rdev->asic = &ci_asic; rdev->asic = &ci_asic;
rdev->num_crtc = 6; rdev->num_crtc = 6;
rdev->has_uvd = true;
break; break;
case CHIP_KAVERI: case CHIP_KAVERI:
case CHIP_KABINI: case CHIP_KABINI:
...@@ -2789,6 +2790,7 @@ int radeon_asic_init(struct radeon_device *rdev) ...@@ -2789,6 +2790,7 @@ int radeon_asic_init(struct radeon_device *rdev)
rdev->num_crtc = 4; rdev->num_crtc = 4;
else else
rdev->num_crtc = 2; rdev->num_crtc = 2;
rdev->has_uvd = true;
break; break;
default: default:
/* FIXME: not supported yet */ /* FIXME: not supported yet */
......
...@@ -4901,7 +4901,7 @@ static void si_set_uvd_dcm(struct radeon_device *rdev, ...@@ -4901,7 +4901,7 @@ static void si_set_uvd_dcm(struct radeon_device *rdev,
WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2); WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
} }
static void si_init_uvd_internal_cg(struct radeon_device *rdev) void si_init_uvd_internal_cg(struct radeon_device *rdev)
{ {
bool hw_mode = true; bool hw_mode = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment