Commit 6a036afb authored by Catalin Marinas's avatar Catalin Marinas

Merge branch 'for-next/neoverse-n1-stale-instr' into for-next/core

Neoverse-N1 cores with the 'COHERENT_ICACHE' feature may fetch stale
instructions when software depends on prefetch-speculation-protection
instead of explicit synchronization. [0]

The workaround is to trap I-Cache maintenance and issue an
inner-shareable TLBI. The affected cores have a Coherent I-Cache, so the
I-Cache maintenance isn't necessary. The core tells user-space it can
skip it with CTR_EL0.DIC. We also have to trap this register to hide the
bit forcing DIC-aware user-space to perform the maintenance.

To avoid trapping all cache-maintenance, this workaround depends on
a firmware component that only traps I-cache maintenance from EL0 and
performs the workaround.

For user-space, the kernel's work is to trap CTR_EL0 to hide DIC, and
produce a fake IminLine. EL3 traps the now-necessary I-Cache maintenance
and performs the inner-shareable-TLBI that makes everything better.

[0] https://developer.arm.com/docs/sden885747/latest/arm-neoverse-n1-mp050-software-developer-errata-notice

* for-next/neoverse-n1-stale-instr:
  arm64: Silence clang warning on mismatched value/register sizes
  arm64: compat: Workaround Neoverse-N1 #1542419 for compat user-space
  arm64: Fake the IminLine size on systems affected by Neoverse-N1 #1542419
  arm64: errata: Hide CTR_EL0.DIC on systems affected by Neoverse-N1 #1542419
parents ba95e9bd 27a22fbd
...@@ -88,6 +88,8 @@ stable kernels. ...@@ -88,6 +88,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A | | ARM | Neoverse-N1 | #1349291 | N/A |
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A | | ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
......
...@@ -558,6 +558,22 @@ config ARM64_ERRATUM_1463225 ...@@ -558,6 +558,22 @@ config ARM64_ERRATUM_1463225
If unsure, say Y. If unsure, say Y.
config ARM64_ERRATUM_1542419
bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
default y
help
This option adds a workaround for ARM Neoverse-N1 erratum
1542419.
Affected Neoverse-N1 cores could execute a stale instruction when
modified by another CPU. The workaround depends on a firmware
counterpart.
Workaround the issue by hiding the DIC feature from EL0. This
forces user-space to perform cache maintenance.
If unsure, say Y.
config CAVIUM_ERRATUM_22375 config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313" bool "Cavium erratum 22375, 24313"
default y default y
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#define CTR_L1IP_MASK 3 #define CTR_L1IP_MASK 3
#define CTR_DMINLINE_SHIFT 16 #define CTR_DMINLINE_SHIFT 16
#define CTR_IMINLINE_SHIFT 0 #define CTR_IMINLINE_SHIFT 0
#define CTR_IMINLINE_MASK 0xf
#define CTR_ERG_SHIFT 20 #define CTR_ERG_SHIFT 20
#define CTR_CWG_SHIFT 24 #define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15 #define CTR_CWG_MASK 15
...@@ -18,7 +19,7 @@ ...@@ -18,7 +19,7 @@
#define CTR_DIC_SHIFT 29 #define CTR_DIC_SHIFT 29
#define CTR_CACHE_MINLINE_MASK \ #define CTR_CACHE_MINLINE_MASK \
(0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
......
...@@ -54,7 +54,8 @@ ...@@ -54,7 +54,8 @@
#define ARM64_WORKAROUND_1463225 44 #define ARM64_WORKAROUND_1463225 44
#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
#define ARM64_WORKAROUND_1542419 47
#define ARM64_NCAPS 47 #define ARM64_NCAPS 48
#endif /* __ASM_CPUCAPS_H */ #endif /* __ASM_CPUCAPS_H */
...@@ -88,13 +88,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, ...@@ -88,13 +88,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
} }
static void static void
cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
{ {
u64 mask = arm64_ftr_reg_ctrel0.strict_mask; u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
bool enable_uct_trap = false;
/* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */
if ((read_cpuid_cachetype() & mask) != if ((read_cpuid_cachetype() & mask) !=
(arm64_ftr_reg_ctrel0.sys_val & mask)) (arm64_ftr_reg_ctrel0.sys_val & mask))
enable_uct_trap = true;
/* ... or if the system is affected by an erratum */
if (cap->capability == ARM64_WORKAROUND_1542419)
enable_uct_trap = true;
if (enable_uct_trap)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
} }
...@@ -648,6 +656,18 @@ needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry, ...@@ -648,6 +656,18 @@ needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry,
return false; return false;
} }
static bool __maybe_unused
has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
int scope)
{
u32 midr = read_cpuid_id();
bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
return is_midr_in_range(midr, &range) && has_dic;
}
#ifdef CONFIG_HARDEN_EL2_VECTORS #ifdef CONFIG_HARDEN_EL2_VECTORS
static const struct midr_range arm64_harden_el2_vectors[] = { static const struct midr_range arm64_harden_el2_vectors[] = {
...@@ -889,6 +909,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ...@@ -889,6 +909,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM, .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM,
ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
}, },
#endif
#ifdef CONFIG_ARM64_ERRATUM_1542419
{
/* we depend on the firmware portion for correctness */
.desc = "ARM erratum 1542419 (kernel portion)",
.capability = ARM64_WORKAROUND_1542419,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = has_neoverse_n1_erratum_1542419,
.cpu_enable = cpu_enable_trap_ctr_access,
},
#endif #endif
{ {
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
*/ */
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/cpufeature.h>
#include <linux/personality.h> #include <linux/personality.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
...@@ -17,6 +18,7 @@ ...@@ -17,6 +18,7 @@
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/system_misc.h> #include <asm/system_misc.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h> #include <asm/unistd.h>
static long static long
...@@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end) ...@@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return 0; return 0;
if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
/*
* The workaround requires an inner-shareable tlbi.
* We pick the reserved-ASID to minimise the impact.
*/
__tlbi(aside1is, __TLBI_VADDR(0, 0));
dsb(ish);
}
ret = __flush_cache_user_range(start, start + chunk); ret = __flush_cache_user_range(start, start + chunk);
if (ret) if (ret)
return ret; return ret;
......
...@@ -470,6 +470,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) ...@@ -470,6 +470,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
int rt = ESR_ELx_SYS64_ISS_RT(esr); int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
/* Hide DIC so that we can trap the unnecessary maintenance...*/
val &= ~BIT(CTR_DIC_SHIFT);
/* ... and fake IminLine to reduce the number of traps. */
val &= ~CTR_IMINLINE_MASK;
val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
}
pt_regs_write_reg(regs, rt, val); pt_regs_write_reg(regs, rt, val);
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment