Commit e76f69b9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-percpu-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 percpu updates from Ingo Molnar:

 - Expand the named address spaces optimizations down to
   GCC 9.1+.

 - Re-enable named address spaces with sanitizers for GCC 13.3+

 - Generate better this_percpu_xchg_op() code

 - Introduce raw_cpu_read_long() to reduce ifdeffery

 - Simplify the x86_this_cpu_test_bit() et al macros

 - Address Sparse warnings

 - Misc cleanups & fixes

* tag 'x86-percpu-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/percpu: Introduce raw_cpu_read_long() to reduce ifdeffery
  x86/percpu: Rewrite x86_this_cpu_test_bit() and friends as macros
  x86/percpu: Fix x86_this_cpu_variable_test_bit() asm template
  x86/percpu: Re-enable named address spaces with sanitizers for GCC 13.3+
  x86/percpu: Use __force to cast from __percpu address space
  x86/percpu: Do not use this_cpu_read_stable_8() for 32-bit targets
  x86/percpu: Unify arch_raw_cpu_ptr() defines
  x86/percpu: Enable named address spaces for GCC 9.1+
  x86/percpu: Re-enable named address spaces with KASAN for GCC 13.3+
  x86/percpu: Move raw_percpu_xchg_op() to a better place
  x86/percpu: Convert this_percpu_xchg_op() from asm() to C code, to generate better code
parents eabb6297 93cfa544
...@@ -38,8 +38,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; ...@@ -38,8 +38,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define this_cpu_has(bit) \ #define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, \ x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
(unsigned long __percpu *)&cpu_info.x86_capability))
/* /*
* This macro is for detection of features which need kernel * This macro is for detection of features which need kernel
......
...@@ -2418,18 +2418,20 @@ source "kernel/livepatch/Kconfig" ...@@ -2418,18 +2418,20 @@ source "kernel/livepatch/Kconfig"
endmenu endmenu
config CC_HAS_NAMED_AS config CC_HAS_NAMED_AS
def_bool CC_IS_GCC && GCC_VERSION >= 120100 def_bool CC_IS_GCC && GCC_VERSION >= 90100
config CC_HAS_NAMED_AS_FIXED_SANITIZERS
def_bool CC_IS_GCC && GCC_VERSION >= 130300
config USE_X86_SEG_SUPPORT config USE_X86_SEG_SUPPORT
def_bool y def_bool y
depends on CC_HAS_NAMED_AS depends on CC_HAS_NAMED_AS
# #
# -fsanitize=kernel-address (KASAN) is at the moment incompatible # -fsanitize=kernel-address (KASAN) and -fsanitize=thread
# with named address spaces - see GCC PR sanitizer/111736. # (KCSAN) are incompatible with named address spaces with
# GCC < 13.3 - see GCC PR sanitizer/111736.
# #
depends on !KASAN depends on !(KASAN || KCSAN) || CC_HAS_NAMED_AS_FIXED_SANITIZERS
# -fsanitize=thread (KCSAN) is also incompatible.
depends on !KCSAN
config CC_HAS_SLS config CC_HAS_SLS
def_bool $(cc-option,-mharden-sls=all) def_bool $(cc-option,-mharden-sls=all)
......
...@@ -129,8 +129,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; ...@@ -129,8 +129,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define this_cpu_has(bit) \ #define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, \ x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
(unsigned long __percpu *)&cpu_info.x86_capability))
/* /*
* This macro is for detection of features which need kernel * This macro is for detection of features which need kernel
......
...@@ -59,36 +59,24 @@ ...@@ -59,36 +59,24 @@
#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" #define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
#define __my_cpu_offset this_cpu_read(this_cpu_off) #define __my_cpu_offset this_cpu_read(this_cpu_off)
#ifdef CONFIG_USE_X86_SEG_SUPPORT
/*
* Efficient implementation for cases in which the compiler supports
* named address spaces. Allows the compiler to perform additional
* optimizations that can save more instructions.
*/
#define arch_raw_cpu_ptr(ptr) \
({ \
unsigned long tcp_ptr__; \
tcp_ptr__ = __raw_cpu_read(, this_cpu_off); \
\
tcp_ptr__ += (unsigned long)(ptr); \
(typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
})
#else /* CONFIG_USE_X86_SEG_SUPPORT */
/* /*
* Compared to the generic __my_cpu_offset version, the following * Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register. * saves one instruction and avoids clobbering a temp register.
*
* arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
* kernel, because games are played with CONFIG_X86_64 there and
* sizeof(this_cpu_off) becames 4.
*/ */
#define arch_raw_cpu_ptr(ptr) \ #ifndef BUILD_VDSO32_64
#define arch_raw_cpu_ptr(_ptr) \
({ \ ({ \
unsigned long tcp_ptr__; \ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \
asm ("mov " __percpu_arg(1) ", %0" \ tcp_ptr__ += (__force unsigned long)(_ptr); \
: "=r" (tcp_ptr__) \ (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \
: "m" (__my_cpu_var(this_cpu_off))); \
\
tcp_ptr__ += (unsigned long)(ptr); \
(typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
}) })
#endif /* CONFIG_USE_X86_SEG_SUPPORT */ #else
#define arch_raw_cpu_ptr(_ptr) ({ BUILD_BUG(); (typeof(_ptr))0; })
#endif
#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel #define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel
...@@ -102,8 +90,8 @@ ...@@ -102,8 +90,8 @@
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#define __my_cpu_type(var) typeof(var) __percpu_seg_override #define __my_cpu_type(var) typeof(var) __percpu_seg_override
#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr) #define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr)
#define __my_cpu_var(var) (*__my_cpu_ptr(&var)) #define __my_cpu_var(var) (*__my_cpu_ptr(&(var)))
#define __percpu_arg(x) __percpu_prefix "%" #x #define __percpu_arg(x) __percpu_prefix "%" #x
#define __force_percpu_arg(x) __force_percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x
...@@ -230,25 +218,26 @@ do { \ ...@@ -230,25 +218,26 @@ do { \
}) })
/* /*
* xchg is implemented using cmpxchg without a lock prefix. xchg is * raw_cpu_xchg() can use a load-store since
* expensive due to the implied lock prefix. The processor cannot prefetch * it is not required to be IRQ-safe.
* cachelines if xchg is used.
*/ */
#define percpu_xchg_op(size, qual, _var, _nval) \ #define raw_percpu_xchg_op(_var, _nval) \
({ \ ({ \
__pcpu_type_##size pxo_old__; \ typeof(_var) pxo_old__ = raw_cpu_read(_var); \
__pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \ raw_cpu_write(_var, _nval); \
asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \ pxo_old__; \
"%[oval]") \ })
"\n1:\t" \
__pcpu_op2_##size("cmpxchg", "%[nval]", \ /*
__percpu_arg([var])) \ * this_cpu_xchg() is implemented using cmpxchg without a lock prefix.
"\n\tjnz 1b" \ * xchg is expensive due to the implied lock prefix. The processor
: [oval] "=&a" (pxo_old__), \ * cannot prefetch cachelines if xchg is used.
[var] "+m" (__my_cpu_var(_var)) \ */
: [nval] __pcpu_reg_##size(, pxo_new__) \ #define this_percpu_xchg_op(_var, _nval) \
: "memory"); \ ({ \
(typeof(_var))(unsigned long) pxo_old__; \ typeof(_var) pxo_old__ = this_cpu_read(_var); \
do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \
pxo_old__; \
}) })
/* /*
...@@ -428,10 +417,6 @@ do { \ ...@@ -428,10 +417,6 @@ do { \
* actually per-thread variables implemented as per-CPU variables and * actually per-thread variables implemented as per-CPU variables and
* thus stable for the duration of the respective task. * thus stable for the duration of the respective task.
*/ */
#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp) #define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
#ifdef CONFIG_USE_X86_SEG_SUPPORT #ifdef CONFIG_USE_X86_SEG_SUPPORT
...@@ -500,6 +485,10 @@ do { \ ...@@ -500,6 +485,10 @@ do { \
#define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
#endif /* CONFIG_USE_X86_SEG_SUPPORT */ #endif /* CONFIG_USE_X86_SEG_SUPPORT */
#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) #define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val) #define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val) #define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
...@@ -509,18 +498,6 @@ do { \ ...@@ -509,18 +498,6 @@ do { \
#define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val) #define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val)
#define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val) #define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val)
#define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val) #define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val)
/*
* raw_cpu_xchg() can use a load-store since it is not required to be
* IRQ-safe.
*/
#define raw_percpu_xchg_op(var, nval) \
({ \
typeof(var) pxo_ret__ = raw_cpu_read(var); \
raw_cpu_write(var, (nval)); \
pxo_ret__; \
})
#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
...@@ -534,9 +511,9 @@ do { \ ...@@ -534,9 +511,9 @@ do { \
#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val) #define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val)
#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val) #define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val)
#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val) #define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val)
#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval) #define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval) #define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval) #define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval)
#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
...@@ -563,6 +540,8 @@ do { \ ...@@ -563,6 +540,8 @@ do { \
* 32 bit must fall back to generic operations. * 32 bit must fall back to generic operations.
*/ */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val) #define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val) #define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val) #define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
...@@ -575,41 +554,41 @@ do { \ ...@@ -575,41 +554,41 @@ do { \
#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val) #define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) #define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
#endif
static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
{
unsigned long __percpu *a =
(unsigned long __percpu *)addr + nr / BITS_PER_LONG;
#ifdef CONFIG_X86_64 #define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp)
return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
#else #else
return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; /* There is no generic 64 bit read stable operation for 32 bit targets. */
#endif #define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
}
static inline bool x86_this_cpu_variable_test_bit(int nr, #define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp)
const unsigned long __percpu *addr) #endif
{
bool oldbit;
asm volatile("btl "__percpu_arg(2)",%1" #define x86_this_cpu_constant_test_bit(_nr, _var) \
CC_SET(c) ({ \
: CC_OUT(c) (oldbit) unsigned long __percpu *addr__ = \
: "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr)); (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \
!!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \
})
return oldbit; #define x86_this_cpu_variable_test_bit(_nr, _var) \
} ({ \
bool oldbit; \
\
asm volatile("btl %[nr], " __percpu_arg([var]) \
CC_SET(c) \
: CC_OUT(c) (oldbit) \
: [var] "m" (__my_cpu_var(_var)), \
[nr] "rI" (_nr)); \
oldbit; \
})
#define x86_this_cpu_test_bit(nr, addr) \ #define x86_this_cpu_test_bit(_nr, _var) \
(__builtin_constant_p((nr)) \ (__builtin_constant_p(_nr) \
? x86_this_cpu_constant_test_bit((nr), (addr)) \ ? x86_this_cpu_constant_test_bit(_nr, _var) \
: x86_this_cpu_variable_test_bit((nr), (addr))) : x86_this_cpu_variable_test_bit(_nr, _var))
#include <asm-generic/percpu.h> #include <asm-generic/percpu.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment