Commit f8a6e48c authored by Linus Torvalds's avatar Linus Torvalds

Merge local branch 'x86-codegen'

Merge trivial x86 code generation annoyances

 - Introduce helper macros for clang asm input problems

 - use said macros to improve trivially stupid code generation issues in
   bitops and array_index_mask_nospec

 - also improve codegen with 32-bit array index comparisons

None of these really matter, but I look at code generation and profiles
fairly regularly, and these misfeatures caused the generated code to
look really odd and distract from the real issues.

* branch 'x86-codegen' of local tree:
  x86: improve bitop code generation with clang
  x86: improve array_index_mask_nospec() code generation
  clang: work around asm input constraint problems
parents 5f16eb05 b9b60b31
...@@ -33,20 +33,16 @@ ...@@ -33,20 +33,16 @@
* Returns: * Returns:
* 0 - (index < size) * 0 - (index < size)
*/ */
static __always_inline unsigned long array_index_mask_nospec(unsigned long index, #define array_index_mask_nospec(idx,sz) ({ \
unsigned long size) typeof((idx)+(sz)) __idx = (idx); \
{ typeof(__idx) __sz = (sz); \
unsigned long mask; unsigned long __mask; \
asm volatile ("cmp %1,%2; sbb %0,%0" \
asm volatile ("cmp %1,%2; sbb %0,%0;" :"=r" (__mask) \
:"=r" (mask) :ASM_INPUT_G (__sz), \
:"g"(size),"r" (index) "r" (__idx) \
:"cc"); :"cc"); \
return mask; __mask; })
}
/* Override the default implementation from linux/nospec.h. */
#define array_index_mask_nospec array_index_mask_nospec
/* Prevent speculative execution past this barrier. */ /* Prevent speculative execution past this barrier. */
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC) #define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
......
...@@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word) ...@@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
{ {
asm("rep; bsf %1,%0" asm("rep; bsf %1,%0"
: "=r" (word) : "=r" (word)
: "rm" (word)); : ASM_INPUT_RM (word));
return word; return word;
} }
...@@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word) ...@@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word)
asm("bsr %1,%0" asm("bsr %1,%0"
: "=r" (word) : "=r" (word)
: "rm" (word)); : ASM_INPUT_RM (word));
return word; return word;
} }
...@@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x) ...@@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x)
*/ */
asm("bsfl %1,%0" asm("bsfl %1,%0"
: "=r" (r) : "=r" (r)
: "rm" (x), "0" (-1)); : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV) #elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t" asm("bsfl %1,%0\n\t"
"cmovzl %2,%0" "cmovzl %2,%0"
...@@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x) ...@@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x)
*/ */
asm("bsrl %1,%0" asm("bsrl %1,%0"
: "=r" (r) : "=r" (r)
: "rm" (x), "0" (-1)); : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV) #elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t" asm("bsrl %1,%0\n\t"
"cmovzl %2,%0" "cmovzl %2,%0"
...@@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x) ...@@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x)
*/ */
asm("bsrq %1,%q0" asm("bsrq %1,%q0"
: "+r" (bitpos) : "+r" (bitpos)
: "rm" (x)); : ASM_INPUT_RM (x));
return bitpos + 1; return bitpos + 1;
} }
#else #else
......
...@@ -118,3 +118,13 @@ ...@@ -118,3 +118,13 @@
#define __diag_ignore_all(option, comment) \ #define __diag_ignore_all(option, comment) \
__diag_clang(13, ignore, option) __diag_clang(13, ignore, option)
/*
* clang has horrible behavior with "g" or "rm" constraints for asm
* inputs, turning them into something worse than "m". Avoid using
* constraints with multiple possible uses (but "ir" seems to be ok):
*
* https://github.com/llvm/llvm-project/issues/20571
*/
#define ASM_INPUT_G "ir"
#define ASM_INPUT_RM "r"
...@@ -409,6 +409,15 @@ struct ftrace_likely_data { ...@@ -409,6 +409,15 @@ struct ftrace_likely_data {
#define asm_goto_output(x...) asm volatile goto(x) #define asm_goto_output(x...) asm volatile goto(x)
#endif #endif
/*
* Clang has trouble with constraints with multiple
* alternative behaviors (mainly "g" and "rm").
*/
#ifndef ASM_INPUT_G
#define ASM_INPUT_G "g"
#define ASM_INPUT_RM "rm"
#endif
#ifdef CONFIG_CC_HAS_ASM_INLINE #ifdef CONFIG_CC_HAS_ASM_INLINE
#define asm_inline asm __inline #define asm_inline asm __inline
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment