Commit addfc386 authored by Andrew Murray's avatar Andrew Murray Committed by Will Deacon

arm64: atomics: avoid out-of-line ll/sc atomics

When building for LSE atomics (CONFIG_ARM64_LSE_ATOMICS), if the hardware
or toolchain doesn't support it the existing code will fallback to ll/sc
atomics. It achieves this by branching from inline assembly to a function
that is built with special compile flags. Further this results in the
clobbering of registers even when the fallback isn't used increasing
register pressure.

Improve this by providing inline implementations of both LSE and
ll/sc and use a static key to select between them, which allows for the
compiler to generate better atomics code. Put the LL/SC fallback atomics
in their own subsection to improve icache performance.
Signed-off-by: default avatarAndrew Murray <andrew.murray@arm.com>
Signed-off-by: default avatarWill Deacon <will@kernel.org>
parent 580fa1b8
......@@ -17,16 +17,7 @@
#ifdef __KERNEL__
#define __ARM64_IN_ATOMIC_IMPL
#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
#include <asm/atomic_lse.h>
#else
#include <asm/atomic_ll_sc.h>
#endif
#undef __ARM64_IN_ATOMIC_IMPL
#include <asm/atomic_arch.h>
#include <asm/cmpxchg.h>
#define ATOMIC_INIT(i) { (i) }
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Selection between LSE and LL/SC atomics.
*
* Copyright (C) 2018 ARM Ltd.
* Author: Andrew Murray <andrew.murray@arm.com>
*/
#ifndef __ASM_ATOMIC_ARCH_H
#define __ASM_ATOMIC_ARCH_H
#include <linux/jump_label.h>
#include <asm/cpucaps.h>
#include <asm/atomic_ll_sc.h>
#include <asm/atomic_lse.h>
extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
extern struct static_key_false arm64_const_caps_ready;
static inline bool system_uses_lse_atomics(void)
{
return (IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) &&
IS_ENABLED(CONFIG_AS_LSE) &&
static_branch_likely(&arm64_const_caps_ready)) &&
static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
}
#define __lse_ll_sc_body(op, ...) \
({ \
system_uses_lse_atomics() ? \
__lse_##op(__VA_ARGS__) : \
__ll_sc_##op(__VA_ARGS__); \
})
#define ATOMIC_OP(op) \
static inline void arch_##op(int i, atomic_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
ATOMIC_OP(atomic_andnot)
ATOMIC_OP(atomic_or)
ATOMIC_OP(atomic_xor)
ATOMIC_OP(atomic_add)
ATOMIC_OP(atomic_and)
ATOMIC_OP(atomic_sub)
#define ATOMIC_FETCH_OP(name, op) \
static inline int arch_##op##name(int i, atomic_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
#define ATOMIC_FETCH_OPS(op) \
ATOMIC_FETCH_OP(_relaxed, op) \
ATOMIC_FETCH_OP(_acquire, op) \
ATOMIC_FETCH_OP(_release, op) \
ATOMIC_FETCH_OP( , op)
ATOMIC_FETCH_OPS(atomic_fetch_andnot)
ATOMIC_FETCH_OPS(atomic_fetch_or)
ATOMIC_FETCH_OPS(atomic_fetch_xor)
ATOMIC_FETCH_OPS(atomic_fetch_add)
ATOMIC_FETCH_OPS(atomic_fetch_and)
ATOMIC_FETCH_OPS(atomic_fetch_sub)
ATOMIC_FETCH_OPS(atomic_add_return)
ATOMIC_FETCH_OPS(atomic_sub_return)
#define ATOMIC64_OP(op) \
static inline void arch_##op(long i, atomic64_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
ATOMIC64_OP(atomic64_andnot)
ATOMIC64_OP(atomic64_or)
ATOMIC64_OP(atomic64_xor)
ATOMIC64_OP(atomic64_add)
ATOMIC64_OP(atomic64_and)
ATOMIC64_OP(atomic64_sub)
#define ATOMIC64_FETCH_OP(name, op) \
static inline long arch_##op##name(long i, atomic64_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
#define ATOMIC64_FETCH_OPS(op) \
ATOMIC64_FETCH_OP(_relaxed, op) \
ATOMIC64_FETCH_OP(_acquire, op) \
ATOMIC64_FETCH_OP(_release, op) \
ATOMIC64_FETCH_OP( , op)
ATOMIC64_FETCH_OPS(atomic64_fetch_andnot)
ATOMIC64_FETCH_OPS(atomic64_fetch_or)
ATOMIC64_FETCH_OPS(atomic64_fetch_xor)
ATOMIC64_FETCH_OPS(atomic64_fetch_add)
ATOMIC64_FETCH_OPS(atomic64_fetch_and)
ATOMIC64_FETCH_OPS(atomic64_fetch_sub)
ATOMIC64_FETCH_OPS(atomic64_add_return)
ATOMIC64_FETCH_OPS(atomic64_sub_return)
static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
return __lse_ll_sc_body(atomic64_dec_if_positive, v);
}
#define __CMPXCHG_CASE(name, sz) \
static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
return __lse_ll_sc_body(_cmpxchg_case_##name##sz, \
ptr, old, new); \
}
__CMPXCHG_CASE( , 8)
__CMPXCHG_CASE( , 16)
__CMPXCHG_CASE( , 32)
__CMPXCHG_CASE( , 64)
__CMPXCHG_CASE(acq_, 8)
__CMPXCHG_CASE(acq_, 16)
__CMPXCHG_CASE(acq_, 32)
__CMPXCHG_CASE(acq_, 64)
__CMPXCHG_CASE(rel_, 8)
__CMPXCHG_CASE(rel_, 16)
__CMPXCHG_CASE(rel_, 32)
__CMPXCHG_CASE(rel_, 64)
__CMPXCHG_CASE(mb_, 8)
__CMPXCHG_CASE(mb_, 16)
__CMPXCHG_CASE(mb_, 32)
__CMPXCHG_CASE(mb_, 64)
#define __CMPXCHG_DBL(name) \
static inline long __cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
unsigned long new2, \
volatile void *ptr) \
{ \
return __lse_ll_sc_body(_cmpxchg_double##name, \
old1, old2, new1, new2, ptr); \
}
__CMPXCHG_DBL( )
__CMPXCHG_DBL(_mb)
#endif /* __ASM_ATOMIC_LSE_H */
......@@ -10,83 +10,86 @@
#ifndef __ASM_ATOMIC_LL_SC_H
#define __ASM_ATOMIC_LL_SC_H
#ifndef __ARM64_IN_ATOMIC_IMPL
#error "please don't include this file directly"
#if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
#define __LL_SC_FALLBACK(asm_ops) \
" b 3f\n" \
" .subsection 1\n" \
"3:\n" \
asm_ops "\n" \
" b 4f\n" \
" .previous\n" \
"4:\n"
#else
#define __LL_SC_FALLBACK(asm_ops) asm_ops
#endif
/*
* AArch64 UP and SMP safe atomic ops. We use load exclusive and
* store exclusive to ensure that these are atomic. We may loop
* to ensure that the update happens.
*
* NOTE: these functions do *not* follow the PCS and must explicitly
* save any clobbered registers other than x0 (regardless of return
* value). This is achieved through -fcall-saved-* compiler flags for
* this file, which unfortunately don't work on a per-function basis
* (the optimize attribute silently ignores these options).
*/
#define ATOMIC_OP(op, asm_op, constraint) \
__LL_SC_INLINE void \
__LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v)) \
static inline void \
__ll_sc_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" stxr %w1, %w0, %2\n" \
" cbnz %w1, 1b" \
" cbnz %w1, 1b\n") \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: #constraint "r" (i)); \
} \
__LL_SC_EXPORT(arch_atomic_##op);
}
#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
__LL_SC_INLINE int \
__LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v)) \
static inline int \
__ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "_return" #name "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ld" #acq "xr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" st" #rel "xr %w1, %w0, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb \
" " #mb ) \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: #constraint "r" (i) \
: cl); \
\
return result; \
} \
__LL_SC_EXPORT(arch_atomic_##op##_return##name);
}
#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
__LL_SC_INLINE int \
__LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v)) \
#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
static inline int \
__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int val, result; \
\
asm volatile("// atomic_fetch_" #op #name "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %3\n" \
"1: ld" #acq "xr %w0, %3\n" \
" " #asm_op " %w1, %w0, %w4\n" \
" st" #rel "xr %w2, %w1, %3\n" \
" cbnz %w2, 1b\n" \
" " #mb \
" " #mb ) \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
: #constraint "r" (i) \
: cl); \
\
return result; \
} \
__LL_SC_EXPORT(arch_atomic_fetch_##op##name);
}
#define ATOMIC_OPS(...) \
ATOMIC_OP(__VA_ARGS__) \
......@@ -121,66 +124,66 @@ ATOMIC_OPS(xor, eor, )
#undef ATOMIC_OP
#define ATOMIC64_OP(op, asm_op, constraint) \
__LL_SC_INLINE void \
__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \
static inline void \
__ll_sc_atomic64_##op(s64 i, atomic64_t *v) \
{ \
s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" stxr %w1, %0, %2\n" \
" cbnz %w1, 1b" \
" cbnz %w1, 1b") \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: #constraint "r" (i)); \
} \
__LL_SC_EXPORT(arch_atomic64_##op);
}
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
__LL_SC_INLINE s64 \
__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
static inline long \
__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
{ \
s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return" #name "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ld" #acq "xr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" st" #rel "xr %w1, %0, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb \
" " #mb ) \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: #constraint "r" (i) \
: cl); \
\
return result; \
} \
__LL_SC_EXPORT(arch_atomic64_##op##_return##name);
}
#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
__LL_SC_INLINE s64 \
__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \
static inline long \
__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
s64 result, val; \
unsigned long tmp; \
\
asm volatile("// atomic64_fetch_" #op #name "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %3\n" \
"1: ld" #acq "xr %0, %3\n" \
" " #asm_op " %1, %0, %4\n" \
" st" #rel "xr %w2, %1, %3\n" \
" cbnz %w2, 1b\n" \
" " #mb \
" " #mb ) \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
: #constraint "r" (i) \
: cl); \
\
return result; \
} \
__LL_SC_EXPORT(arch_atomic64_fetch_##op##name);
}
#define ATOMIC64_OPS(...) \
ATOMIC64_OP(__VA_ARGS__) \
......@@ -214,13 +217,14 @@ ATOMIC64_OPS(xor, eor, L)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
__LL_SC_INLINE s64
__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
static inline s64
__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
{
s64 result;
unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n"
__LL_SC_FALLBACK(
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" subs %0, %0, #1\n"
......@@ -228,20 +232,19 @@ __LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
" stlxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
" dmb ish\n"
"2:"
"2:")
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
:
: "cc", "memory");
return result;
}
__LL_SC_EXPORT(arch_atomic64_dec_if_positive);
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
__LL_SC_INLINE u##sz \
__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
static inline u##sz \
__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
unsigned long old, \
u##sz new)) \
u##sz new) \
{ \
unsigned long tmp; \
u##sz oldval; \
......@@ -255,6 +258,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
old = (u##sz)old; \
\
asm volatile( \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %[v]\n" \
"1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
......@@ -262,15 +266,14 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
" st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
" cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
"2:" \
"2:") \
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
[v] "+Q" (*(u##sz *)ptr) \
: [old] #constraint "r" (old), [new] "r" (new) \
: cl); \
\
return oldval; \
} \
__LL_SC_EXPORT(__cmpxchg_case_##name##sz);
}
/*
* Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
......@@ -297,16 +300,17 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
#undef __CMPXCHG_CASE
#define __CMPXCHG_DBL(name, mb, rel, cl) \
__LL_SC_INLINE long \
__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
static inline long \
__ll_sc__cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
unsigned long new2, \
volatile void *ptr)) \
volatile void *ptr) \
{ \
unsigned long tmp, ret; \
\
asm volatile("// __cmpxchg_double" #name "\n" \
__LL_SC_FALLBACK( \
" prfm pstl1strm, %2\n" \
"1: ldxp %0, %1, %2\n" \
" eor %0, %0, %3\n" \
......@@ -316,14 +320,13 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
" st" #rel "xp %w0, %5, %6, %2\n" \
" cbnz %w0, 1b\n" \
" " #mb "\n" \
"2:" \
"2:") \
: "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
: "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
: cl); \
\
return ret; \
} \
__LL_SC_EXPORT(__cmpxchg_double##name);
}
__CMPXCHG_DBL( , , , )
__CMPXCHG_DBL(_mb, dmb ish, l, "memory")
......
This diff is collapsed.
......@@ -10,7 +10,7 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <asm/atomic.h>
#include <asm/atomic_arch.h>
#include <asm/barrier.h>
#include <asm/lse.h>
......
......@@ -22,14 +22,6 @@
__asm__(".arch_extension lse");
/* Move the ll/sc atomics out-of-line */
#define __LL_SC_INLINE notrace
#define __LL_SC_PREFIX(x) __ll_sc_##x
#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x))
/* Macro for constructing calls to out-of-line ll/sc atomics */
#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
#define __LL_SC_CLOBBERS "x16", "x17", "x30"
/* In-line patching at runtime */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
......@@ -46,9 +38,6 @@ __asm__(".arch_extension lse");
#else /* __ASSEMBLER__ */
#define __LL_SC_INLINE static inline
#define __LL_SC_PREFIX(x) x
#define __LL_SC_EXPORT(x)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment