Commit af66706a authored by Konrad Dybcio's avatar Konrad Dybcio Committed by Rob Clark

drm/msm/a6xx: Add skeleton A7xx support

A7xx GPUs are - from kernel's POV anyway - basically another generation
of A6xx. They build upon the A650/A660_family advancements, skipping some
writes (presumably more values are preset correctly on reset), adding
some new ones and changing others.

One notable difference is the introduction of a second shadow, called BV.
To handle this with the current code, allocate it right after the current
RPTR shadow.

BV handling and .submit are mostly based on Jonathan Marek's work.

All A7xx GPUs are assumed to have a GMU.
A702 is not an A7xx-class GPU, it's a weird forked A610.

Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8550-QRD
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> # sm8450
Signed-off-by: default avatarKonrad Dybcio <konrad.dybcio@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/559285/Signed-off-by: default avatarRob Clark <robdclark@chromium.org>
parent 23eeae60
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
#include <linux/bitfield.h>
#include <linux/clk.h> #include <linux/clk.h>
#include <linux/interconnect.h> #include <linux/interconnect.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
...@@ -202,9 +203,10 @@ int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu) ...@@ -202,9 +203,10 @@ int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu)
static int a6xx_gmu_start(struct a6xx_gmu *gmu) static int a6xx_gmu_start(struct a6xx_gmu *gmu)
{ {
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
u32 mask, reset_val, val;
int ret; int ret;
u32 val;
u32 mask, reset_val;
val = gmu_read(gmu, REG_A6XX_GMU_CM3_DTCM_START + 0xff8); val = gmu_read(gmu, REG_A6XX_GMU_CM3_DTCM_START + 0xff8);
if (val <= 0x20010004) { if (val <= 0x20010004) {
...@@ -220,7 +222,11 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu) ...@@ -220,7 +222,11 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
/* Set the log wptr index /* Set the log wptr index
* note: downstream saves the value in poweroff and restores it here * note: downstream saves the value in poweroff and restores it here
*/ */
gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP, 0); if (adreno_is_a7xx(adreno_gpu))
gmu_write(gmu, REG_A6XX_GMU_GENERAL_9, 0);
else
gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP, 0);
gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 0); gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 0);
...@@ -520,7 +526,9 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) ...@@ -520,7 +526,9 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
if (IS_ERR(pdcptr)) if (IS_ERR(pdcptr))
goto err; goto err;
if (adreno_is_a650(adreno_gpu) || adreno_is_a660_family(adreno_gpu)) if (adreno_is_a650(adreno_gpu) ||
adreno_is_a660_family(adreno_gpu) ||
adreno_is_a7xx(adreno_gpu))
pdc_in_aop = true; pdc_in_aop = true;
else if (adreno_is_a618(adreno_gpu) || adreno_is_a640_family(adreno_gpu)) else if (adreno_is_a618(adreno_gpu) || adreno_is_a640_family(adreno_gpu))
pdc_address_offset = 0x30090; pdc_address_offset = 0x30090;
...@@ -552,7 +560,8 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) ...@@ -552,7 +560,8 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514); gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514);
/* Load RSC sequencer uCode for sleep and wakeup */ /* Load RSC sequencer uCode for sleep and wakeup */
if (adreno_is_a650_family(adreno_gpu)) { if (adreno_is_a650_family(adreno_gpu) ||
adreno_is_a7xx(adreno_gpu)) {
gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xeaaae5a0); gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xeaaae5a0);
gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xe1a1ebab); gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xe1a1ebab);
gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e0a581); gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e0a581);
...@@ -637,11 +646,18 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) ...@@ -637,11 +646,18 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
/* Set up the idle state for the GMU */ /* Set up the idle state for the GMU */
static void a6xx_gmu_power_config(struct a6xx_gmu *gmu) static void a6xx_gmu_power_config(struct a6xx_gmu *gmu)
{ {
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
/* Disable GMU WB/RB buffer */ /* Disable GMU WB/RB buffer */
gmu_write(gmu, REG_A6XX_GMU_SYS_BUS_CONFIG, 0x1); gmu_write(gmu, REG_A6XX_GMU_SYS_BUS_CONFIG, 0x1);
gmu_write(gmu, REG_A6XX_GMU_ICACHE_CONFIG, 0x1); gmu_write(gmu, REG_A6XX_GMU_ICACHE_CONFIG, 0x1);
gmu_write(gmu, REG_A6XX_GMU_DCACHE_CONFIG, 0x1); gmu_write(gmu, REG_A6XX_GMU_DCACHE_CONFIG, 0x1);
/* A7xx knows better by default! */
if (adreno_is_a7xx(adreno_gpu))
return;
gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0x9c40400); gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0x9c40400);
switch (gmu->idle_level) { switch (gmu->idle_level) {
...@@ -698,7 +714,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) ...@@ -698,7 +714,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu)
u32 itcm_base = 0x00000000; u32 itcm_base = 0x00000000;
u32 dtcm_base = 0x00040000; u32 dtcm_base = 0x00040000;
if (adreno_is_a650_family(adreno_gpu)) if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
dtcm_base = 0x10004000; dtcm_base = 0x10004000;
if (gmu->legacy) { if (gmu->legacy) {
...@@ -747,14 +763,22 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ...@@ -747,14 +763,22 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
{ {
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
u32 fence_range_lower, fence_range_upper;
int ret; int ret;
u32 chipid; u32 chipid;
if (adreno_is_a650_family(adreno_gpu)) { /* Vote veto for FAL10 */
if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) {
gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 1); gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 1);
gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 1); gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 1);
} }
/* Turn on TCM (Tightly Coupled Memory) retention */
if (adreno_is_a7xx(adreno_gpu))
a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL, 1);
else
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
if (state == GMU_WARM_BOOT) { if (state == GMU_WARM_BOOT) {
ret = a6xx_rpmh_start(gmu); ret = a6xx_rpmh_start(gmu);
if (ret) if (ret)
...@@ -764,9 +788,6 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ...@@ -764,9 +788,6 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
"GMU firmware is not loaded\n")) "GMU firmware is not loaded\n"))
return -ENOENT; return -ENOENT;
/* Turn on register retention */
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
ret = a6xx_rpmh_start(gmu); ret = a6xx_rpmh_start(gmu);
if (ret) if (ret)
return ret; return ret;
...@@ -776,6 +797,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ...@@ -776,6 +797,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
return ret; return ret;
} }
/* Clear init result to make sure we are getting a fresh value */
gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0); gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0);
gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02); gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02);
...@@ -783,8 +805,18 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ...@@ -783,8 +805,18 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi.iova); gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi.iova);
gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1); gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1);
if (adreno_is_a7xx(adreno_gpu)) {
fence_range_upper = 0x32;
fence_range_lower = 0x8a0;
} else {
fence_range_upper = 0xa;
fence_range_lower = 0xa0;
}
gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_RANGE_0, gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_RANGE_0,
(1 << 31) | (0xa << 18) | (0xa0)); BIT(31) |
FIELD_PREP(GENMASK(30, 18), fence_range_upper) |
FIELD_PREP(GENMASK(17, 0), fence_range_lower));
/* /*
* Snapshots toggle the NMI bit which will result in a jump to the NMI * Snapshots toggle the NMI bit which will result in a jump to the NMI
...@@ -803,10 +835,17 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ...@@ -803,10 +835,17 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
chipid |= (adreno_gpu->chip_id << 4) & 0xf000; /* minor */ chipid |= (adreno_gpu->chip_id << 4) & 0xf000; /* minor */
chipid |= (adreno_gpu->chip_id << 8) & 0x0f00; /* patchid */ chipid |= (adreno_gpu->chip_id << 8) & 0x0f00; /* patchid */
gmu_write(gmu, REG_A6XX_GMU_HFI_SFR_ADDR, chipid); if (adreno_is_a7xx(adreno_gpu)) {
gmu_write(gmu, REG_A6XX_GMU_GENERAL_10, chipid);
gmu_write(gmu, REG_A6XX_GMU_GENERAL_8,
(gmu->log.iova & GENMASK(31, 12)) |
((gmu->log.size / SZ_4K - 1) & GENMASK(7, 0)));
} else {
gmu_write(gmu, REG_A6XX_GMU_HFI_SFR_ADDR, chipid);
gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG, gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG,
gmu->log.iova | (gmu->log.size / SZ_4K - 1)); gmu->log.iova | (gmu->log.size / SZ_4K - 1));
}
/* Set up the lowest idle level on the GMU */ /* Set up the lowest idle level on the GMU */
a6xx_gmu_power_config(gmu); a6xx_gmu_power_config(gmu);
...@@ -980,15 +1019,19 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) ...@@ -980,15 +1019,19 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
enable_irq(gmu->gmu_irq); enable_irq(gmu->gmu_irq);
/* Check to see if we are doing a cold or warm boot */ /* Check to see if we are doing a cold or warm boot */
status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ? if (adreno_is_a7xx(adreno_gpu)) {
GMU_WARM_BOOT : GMU_COLD_BOOT; status = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL) == 1 ?
GMU_WARM_BOOT : GMU_COLD_BOOT;
/* } else if (gmu->legacy) {
* Warm boot path does not work on newer GPUs status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ?
* Presumably this is because icache/dcache regions must be restored GMU_WARM_BOOT : GMU_COLD_BOOT;
*/ } else {
if (!gmu->legacy) /*
* Warm boot path does not work on newer A6xx GPUs
* Presumably this is because icache/dcache regions must be restored
*/
status = GMU_COLD_BOOT; status = GMU_COLD_BOOT;
}
ret = a6xx_gmu_fw_start(gmu, status); ret = a6xx_gmu_fw_start(gmu, status);
if (ret) if (ret)
...@@ -1600,7 +1643,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) ...@@ -1600,7 +1643,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
* are otherwise unused by a660. * are otherwise unused by a660.
*/ */
gmu->dummy.size = SZ_4K; gmu->dummy.size = SZ_4K;
if (adreno_is_a660_family(adreno_gpu)) { if (adreno_is_a660_family(adreno_gpu) ||
adreno_is_a7xx(adreno_gpu)) {
ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7, ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7,
0x60400000, "debug"); 0x60400000, "debug");
if (ret) if (ret)
...@@ -1616,7 +1660,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) ...@@ -1616,7 +1660,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
goto err_memory; goto err_memory;
/* Note that a650 family also includes a660 family: */ /* Note that a650 family also includes a660 family: */
if (adreno_is_a650_family(adreno_gpu)) { if (adreno_is_a650_family(adreno_gpu) ||
adreno_is_a7xx(adreno_gpu)) {
ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache, ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
SZ_16M - SZ_16K, 0x04000, "icache"); SZ_16M - SZ_16K, 0x04000, "icache");
if (ret) if (ret)
...@@ -1664,7 +1709,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) ...@@ -1664,7 +1709,8 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
goto err_memory; goto err_memory;
} }
if (adreno_is_a650_family(adreno_gpu)) { if (adreno_is_a650_family(adreno_gpu) ||
adreno_is_a7xx(adreno_gpu)) {
gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc"); gmu->rscc = a6xx_gmu_get_mmio(pdev, "rscc");
if (IS_ERR(gmu->rscc)) { if (IS_ERR(gmu->rscc)) {
ret = -ENODEV; ret = -ENODEV;
......
This diff is collapsed.
...@@ -567,6 +567,7 @@ int adreno_hw_init(struct msm_gpu *gpu) ...@@ -567,6 +567,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
ring->cur = ring->start; ring->cur = ring->start;
ring->next = ring->start; ring->next = ring->start;
ring->memptrs->rptr = 0; ring->memptrs->rptr = 0;
ring->memptrs->bv_fence = ring->fctx->completed_fence;
/* Detect and clean up an impossible fence, ie. if GPU managed /* Detect and clean up an impossible fence, ie. if GPU managed
* to scribble something invalid, we don't want that to confuse * to scribble something invalid, we don't want that to confuse
......
...@@ -46,6 +46,7 @@ enum adreno_family { ...@@ -46,6 +46,7 @@ enum adreno_family {
ADRENO_6XX_GEN2, /* a640 family */ ADRENO_6XX_GEN2, /* a640 family */
ADRENO_6XX_GEN3, /* a650 family */ ADRENO_6XX_GEN3, /* a650 family */
ADRENO_6XX_GEN4, /* a660 family */ ADRENO_6XX_GEN4, /* a660 family */
ADRENO_7XX_GEN1, /* a730 family */
}; };
#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) #define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0)
...@@ -391,7 +392,8 @@ static inline int adreno_is_a650_family(const struct adreno_gpu *gpu) ...@@ -391,7 +392,8 @@ static inline int adreno_is_a650_family(const struct adreno_gpu *gpu)
{ {
if (WARN_ON_ONCE(!gpu->info)) if (WARN_ON_ONCE(!gpu->info))
return false; return false;
return gpu->info->family >= ADRENO_6XX_GEN3; return gpu->info->family == ADRENO_6XX_GEN3 ||
gpu->info->family == ADRENO_6XX_GEN4;
} }
static inline int adreno_is_a640_family(const struct adreno_gpu *gpu) static inline int adreno_is_a640_family(const struct adreno_gpu *gpu)
...@@ -401,6 +403,12 @@ static inline int adreno_is_a640_family(const struct adreno_gpu *gpu) ...@@ -401,6 +403,12 @@ static inline int adreno_is_a640_family(const struct adreno_gpu *gpu)
return gpu->info->family == ADRENO_6XX_GEN2; return gpu->info->family == ADRENO_6XX_GEN2;
} }
static inline int adreno_is_a7xx(struct adreno_gpu *gpu)
{
/* Update with non-fake (i.e. non-A702) Gen 7 GPUs */
return gpu->info->family == ADRENO_7XX_GEN1;
}
u64 adreno_private_address_space_size(struct msm_gpu *gpu); u64 adreno_private_address_space_size(struct msm_gpu *gpu);
int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
uint32_t param, uint64_t *value, uint32_t *len); uint32_t param, uint64_t *value, uint32_t *len);
......
...@@ -30,6 +30,8 @@ struct msm_gpu_submit_stats { ...@@ -30,6 +30,8 @@ struct msm_gpu_submit_stats {
struct msm_rbmemptrs { struct msm_rbmemptrs {
volatile uint32_t rptr; volatile uint32_t rptr;
volatile uint32_t fence; volatile uint32_t fence;
/* Introduced on A7xx */
volatile uint32_t bv_fence;
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT]; volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
volatile u64 ttbr0; volatile u64 ttbr0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment