Commit 5e025531 authored by Peng Ju Zhou's avatar Peng Ju Zhou Committed by Alex Deucher

drm/amdgpu: indirect register access for nv12 sriov

1. expand rlcg interface for gc & mmhub indirect access
2. add rlcg interface for no kiq

v2: squash in fix for gfx9 (Changfeng)
Signed-off-by: default avatarPeng Ju Zhou <PengJu.Zhou@amd.com>
Reviewed-by: default avatarEmily.Deng <Emily.Deng@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5d238510
...@@ -490,7 +490,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, ...@@ -490,7 +490,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
adev->gfx.rlc.funcs && adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) { adev->gfx.rlc.funcs->is_rlcg_access_range) {
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0);
} else { } else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
} }
......
...@@ -127,7 +127,8 @@ struct amdgpu_rlc_funcs { ...@@ -127,7 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev); void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v); void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag);
u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 flag);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
}; };
......
...@@ -173,6 +173,11 @@ ...@@ -173,6 +173,11 @@
#define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030
#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0
#define GFX_RLCG_GC_WRITE_OLD (0x8 << 28)
#define GFX_RLCG_GC_WRITE (0x0 << 28)
#define GFX_RLCG_GC_READ (0x1 << 28)
#define GFX_RLCG_MMHUB_WRITE (0x2 << 28)
MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
MODULE_FIRMWARE("amdgpu/navi10_me.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin");
...@@ -1418,38 +1423,127 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = ...@@ -1418,38 +1423,127 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
}; };
static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write)
{
/* always programed by rlcg, only for gc */
if (offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI) ||
offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO) ||
offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH) ||
offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL) ||
offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX) ||
offset == SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)) {
if (!amdgpu_sriov_reg_indirect_gc(adev))
*flag = GFX_RLCG_GC_WRITE_OLD;
else
*flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
return true;
}
/* currently support gc read/write, mmhub write */
if (offset >= SOC15_REG_OFFSET(GC, 0, mmSDMA0_DEC_START) &&
offset <= SOC15_REG_OFFSET(GC, 0, mmRLC_GTS_OFFSET_MSB)) {
if (amdgpu_sriov_reg_indirect_gc(adev))
*flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ;
else
return false;
} else {
if (amdgpu_sriov_reg_indirect_mmhub(adev))
*flag = GFX_RLCG_MMHUB_WRITE;
else
return false;
}
return true;
}
static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)
{ {
static void *scratch_reg0; static void *scratch_reg0;
static void *scratch_reg1; static void *scratch_reg1;
static void *scratch_reg2;
static void *scratch_reg3;
static void *spare_int; static void *spare_int;
static uint32_t grbm_cntl;
static uint32_t grbm_idx;
uint32_t i = 0; uint32_t i = 0;
uint32_t retries = 50000; uint32_t retries = 50000;
u32 ret = 0;
scratch_reg0 = adev->rmmio +
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4;
scratch_reg1 = adev->rmmio +
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1) * 4;
scratch_reg2 = adev->rmmio +
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4;
scratch_reg3 = adev->rmmio +
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4;
spare_int = adev->rmmio +
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4;
grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
if (offset == grbm_cntl || offset == grbm_idx) {
if (offset == grbm_cntl)
writel(v, scratch_reg2);
else if (offset == grbm_idx)
writel(v, scratch_reg3);
writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
} else {
writel(v, scratch_reg0);
writel(offset | flag, scratch_reg1);
writel(1, spare_int);
for (i = 0; i < retries; i++) {
u32 tmp;
tmp = readl(scratch_reg1);
if (!(tmp & flag))
break;
scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; udelay(10);
scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; }
spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
if (amdgpu_sriov_runtime(adev)) { if (i >= retries)
pr_err("shouldn't call rlcg write register during runtime\n"); pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
return;
} }
writel(v, scratch_reg0); ret = readl(scratch_reg0);
writel(offset | 0x80000000, scratch_reg1);
writel(1, spare_int);
for (i = 0; i < retries; i++) {
u32 tmp;
tmp = readl(scratch_reg1); return ret;
if (!(tmp & 0x80000000)) }
break;
udelay(10); static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 flag)
{
uint32_t rlcg_flag;
if (amdgpu_sriov_fullaccess(adev) &&
gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 1)) {
gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag);
return;
} }
if (flag & AMDGPU_REGS_NO_KIQ)
WREG32_NO_KIQ(offset, value);
else
WREG32(offset, value);
}
static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 flag)
{
uint32_t rlcg_flag;
if (i >= retries) if (amdgpu_sriov_fullaccess(adev) &&
pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 0))
return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag);
if (flag & AMDGPU_REGS_NO_KIQ)
return RREG32_NO_KIQ(offset);
else
return RREG32(offset);
return 0;
} }
static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
...@@ -7884,6 +7978,7 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { ...@@ -7884,6 +7978,7 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.start = gfx_v10_0_rlc_start, .start = gfx_v10_0_rlc_start,
.update_spm_vmid = gfx_v10_0_update_spm_vmid, .update_spm_vmid = gfx_v10_0_update_spm_vmid,
.rlcg_wreg = gfx_v10_rlcg_wreg, .rlcg_wreg = gfx_v10_rlcg_wreg,
.rlcg_rreg = gfx_v10_rlcg_rreg,
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
}; };
......
...@@ -734,7 +734,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = ...@@ -734,7 +734,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
}; };
static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) static void gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
{ {
static void *scratch_reg0; static void *scratch_reg0;
static void *scratch_reg1; static void *scratch_reg1;
...@@ -787,6 +787,20 @@ static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) ...@@ -787,6 +787,20 @@ static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
} }
static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
{
if (amdgpu_sriov_fullaccess(adev)) {
gfx_v9_0_rlcg_rw(adev, offset, v, flag);
return;
}
if (flag & AMDGPU_REGS_NO_KIQ)
WREG32_NO_KIQ(offset, v);
else
WREG32(offset, v);
}
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
......
...@@ -77,27 +77,11 @@ ...@@ -77,27 +77,11 @@
}) })
#define WREG32_RLC(reg, value) \ #define WREG32_RLC(reg, value) \
do { \ do { \
if (amdgpu_sriov_fullaccess(adev)) { \ if (adev->gfx.rlc.funcs->rlcg_wreg) \
uint32_t i = 0; \ adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, 0); \
uint32_t retries = 50000; \ else \
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ WREG32(reg, value); \
uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
WREG32(r0, value); \
WREG32(r1, (reg | 0x80000000)); \
WREG32(spare_int, 0x1); \
for (i = 0; i < retries; i++) { \
u32 tmp = RREG32(r1); \
if (!(tmp & 0x80000000)) \
break; \
udelay(10); \
} \
if (i >= retries) \
pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
} else { \
WREG32(reg, value); \
} \
} while (0) } while (0)
#define WREG32_RLC_EX(prefix, reg, value) \ #define WREG32_RLC_EX(prefix, reg, value) \
...@@ -125,23 +109,24 @@ ...@@ -125,23 +109,24 @@
} while (0) } while (0)
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
do { \ WREG32_RLC((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value)
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
if (amdgpu_sriov_fullaccess(adev)) { \ #define RREG32_RLC(reg) \
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ (adev->gfx.rlc.funcs->rlcg_rreg ? \
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, 0) : RREG32(reg))
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \ #define WREG32_RLC_NO_KIQ(reg, value) \
if (target_reg == grbm_cntl) \ do { \
WREG32(r2, value); \ if (adev->gfx.rlc.funcs->rlcg_wreg) \
else if (target_reg == grbm_idx) \ adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, AMDGPU_REGS_NO_KIQ); \
WREG32(r3, value); \ else \
WREG32(target_reg, value); \ WREG32_NO_KIQ(reg, value); \
} else { \
WREG32(target_reg, value); \
} \
} while (0) } while (0)
#define RREG32_RLC_NO_KIQ(reg) \
(adev->gfx.rlc.funcs->rlcg_rreg ? \
adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, AMDGPU_REGS_NO_KIQ) : RREG32_NO_KIQ(reg))
#define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \ #define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \
do { \ do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
...@@ -160,10 +145,13 @@ ...@@ -160,10 +145,13 @@
} \ } \
} while (0) } while (0)
#define RREG32_SOC15_RLC(ip, inst, reg) \
RREG32_RLC(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
#define WREG32_SOC15_RLC(ip, inst, reg, value) \ #define WREG32_SOC15_RLC(ip, inst, reg, value) \
do { \ do { \
uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ uint32_t target_reg = adev->reg_offset[ip##_HWIP][0][reg##_BASE_IDX] + reg;\
WREG32_RLC(target_reg, value); \ WREG32_RLC(target_reg, value); \
} while (0) } while (0)
#define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \ #define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
...@@ -173,11 +161,14 @@ ...@@ -173,11 +161,14 @@
} while (0) } while (0)
#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ #define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
(RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ (RREG32_RLC(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
& ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ #define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
#define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
RREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset))
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment