Commit 8ad7e8f6 authored by Thomas Gleixner's avatar Thomas Gleixner

x86/fpu/xsave: Support XSAVEC in the kernel

XSAVEC is the user space counterpart of XSAVES which cannot save supervisor
state. In virtualization scenarios the hypervisor does not expose XSAVES
but XSAVEC to the guest, though the kernel does not make use of it.

That's unfortunate because XSAVEC uses the compacted format of saving the
XSTATE. This is more efficient in terms of storage space vs. XSAVE[OPT] as
it does not create holes for XSTATE components which are not supported or
enabled by the kernel but are available in hardware. There is room for
further optimizations when XSAVEC/S and XGETBV1 are supported.

In order to support XSAVEC:

 - Define the XSAVEC ASM macro as it's not yet supported by the required
   minimal toolchain.

 - Create a software defined X86_FEATURE_XCOMPACTED to select the compacted
   XSTATE buffer format for both XSAVEC and XSAVES.

 - Make XSAVEC an option in the 'XSAVE' ASM alternatives
Requested-by: default avatarAndrew Cooper <Andrew.Cooper3@citrix.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220404104820.598704095@linutronix.de
parent af2d861d
...@@ -201,7 +201,7 @@ ...@@ -201,7 +201,7 @@
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
/* FREE! ( 7*32+10) */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
......
...@@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) ...@@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
* Non-compacted format and legacy features use the cached fixed * Non-compacted format and legacy features use the cached fixed
* offsets. * offsets.
*/ */
if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE) if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
xfeature <= XFEATURE_SSE)
return xstate_offsets[xfeature]; return xstate_offsets[xfeature];
/* /*
...@@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void) ...@@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void)
/* /*
* All components are now in init state. Read the state back so * All components are now in init state. Read the state back so
* that init_fpstate contains all non-zero init state. This only * that init_fpstate contains all non-zero init state. This only
* works with XSAVE, but not with XSAVEOPT and XSAVES because * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
* those use the init optimization which skips writing data for * those use the init optimization which skips writing data for
* components in init state. * components in init state.
* *
* XSAVE could be used, but that would require to reshuffle the * XSAVE could be used, but that would require to reshuffle the
* data when XSAVES is available because XSAVES uses xstate * data when XSAVEC/S is available because XSAVEC/S uses xstate
* compaction. But doing so is a pointless exercise because most * compaction. But doing so is a pointless exercise because most
* components have an all zeros init state except for the legacy * components have an all zeros init state except for the legacy
* ones (FP and SSE). Those can be saved with FXSAVE into the * ones (FP and SSE). Those can be saved with FXSAVE into the
...@@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) ...@@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
*/ */
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
{ {
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
int i; int i;
...@@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) ...@@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* Supervisor state components can be managed only by * Supervisor state components can be managed only by
* XSAVES. * XSAVES.
*/ */
if (!compacted && xfeature_is_supervisor(i)) { if (!xsaves && xfeature_is_supervisor(i)) {
XSTATE_WARN_ON(1); XSTATE_WARN_ON(1);
return false; return false;
} }
...@@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) ...@@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* the size of the *user* states. If we use it to size a buffer * the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the * that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too. * buffer because 'XSAVES' saves system states too.
*
* This also takes compaction into account. So this works for
* XSAVEC as well.
*/ */
static unsigned int __init get_xsaves_size(void) static unsigned int __init get_compacted_size(void)
{ {
unsigned int eax, ebx, ecx, edx; unsigned int eax, ebx, ecx, edx;
/* /*
...@@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void) ...@@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void)
* containing all the state components * containing all the state components
* corresponding to bits currently set in * corresponding to bits currently set in
* XCR0 | IA32_XSS. * XCR0 | IA32_XSS.
*
* When XSAVES is not available but XSAVEC is (virt), then there
* are no supervisor states, but XSAVEC still uses compacted
* format.
*/ */
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
return ebx; return ebx;
...@@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void) ...@@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void)
* Get the total size of the enabled xstates without the independent supervisor * Get the total size of the enabled xstates without the independent supervisor
* features. * features.
*/ */
static unsigned int __init get_xsaves_size_no_independent(void) static unsigned int __init get_xsave_compacted_size(void)
{ {
u64 mask = xfeatures_mask_independent(); u64 mask = xfeatures_mask_independent();
unsigned int size; unsigned int size;
if (!mask) if (!mask)
return get_xsaves_size(); return get_compacted_size();
/* Disable independent features. */ /* Disable independent features. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
...@@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void) ...@@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void)
* Ask the hardware what size is required of the buffer. * Ask the hardware what size is required of the buffer.
* This is the size required for the task->fpu buffer. * This is the size required for the task->fpu buffer.
*/ */
size = get_xsaves_size(); size = get_compacted_size();
/* Re-enable independent features so XSAVES will work on them again. */ /* Re-enable independent features so XSAVES will work on them again. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
...@@ -687,20 +696,21 @@ static int __init init_xstate_size(void) ...@@ -687,20 +696,21 @@ static int __init init_xstate_size(void)
{ {
/* Recompute the context size for enabled features: */ /* Recompute the context size for enabled features: */
unsigned int user_size, kernel_size, kernel_default_size; unsigned int user_size, kernel_size, kernel_default_size;
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
/* Uncompacted user space size */ /* Uncompacted user space size */
user_size = get_xsave_size_user(); user_size = get_xsave_size_user();
/* /*
* XSAVES kernel size includes supervisor states and * XSAVES kernel size includes supervisor states and uses compacted
* uses compacted format when available. * format. XSAVEC uses compacted format, but does not save
* supervisor states.
* *
* XSAVE does not support supervisor states so * XSAVE[OPT] do not support supervisor states so kernel and user
* kernel and user size is identical. * size is identical.
*/ */
if (compacted) if (compacted)
kernel_size = get_xsaves_size_no_independent(); kernel_size = get_xsave_compacted_size();
else else
kernel_size = user_size; kernel_size = user_size;
...@@ -813,6 +823,9 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) ...@@ -813,6 +823,9 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (!cpu_feature_enabled(X86_FEATURE_XFD)) if (!cpu_feature_enabled(X86_FEATURE_XFD))
fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
else
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED; XFEATURE_MASK_SUPERVISOR_SUPPORTED;
...@@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) ...@@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
*/ */
init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
/* Set up compaction feature bit */
if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
cpu_feature_enabled(X86_FEATURE_XSAVES))
setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
/* Enable xstate instructions to be able to continue with initialization: */ /* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate(); fpu__init_cpu_xstate();
...@@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) ...@@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
fpu_kernel_cfg.max_features, fpu_kernel_cfg.max_features,
fpu_kernel_cfg.max_size, fpu_kernel_cfg.max_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
return; return;
out_disable: out_disable:
...@@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) ...@@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
return NULL; return NULL;
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
return NULL; return NULL;
} }
...@@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) ...@@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
* vendors into extending XFD for the pre AMX states, especially * vendors into extending XFD for the pre AMX states, especially
* AVX512. * AVX512.
*/ */
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
struct fpu *fpu = &current->group_leader->thread.fpu; struct fpu *fpu = &current->group_leader->thread.fpu;
struct fpu_state_perm *perm; struct fpu_state_perm *perm;
unsigned int ksize, usize; unsigned int ksize, usize;
......
...@@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) ...@@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
* XRSTORS requires these bits set in xcomp_bv, or it will * XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP: * trigger #GP:
*/ */
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED))
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
} }
...@@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void) ...@@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void)
/* These macros all use (%edi)/(%rdi) as the single memory argument. */ /* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVEC ".byte " REX_PREFIX "0x0f,0xc7,0x27"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
...@@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void) ...@@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void)
: "memory") : "memory")
/* /*
* If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor
* format and supervisor states in addition to modified optimization in * states in addition to XSAVEC.
* XSAVEOPT. *
* Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports
* compacted storage format in addition to XSAVEOPT.
* *
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE. * supports modified optimization which is not supported by XSAVE.
...@@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void) ...@@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void)
* address of the instruction where we might get an exception at. * address of the instruction where we might get an exception at.
*/ */
#define XSTATE_XSAVE(st, lmask, hmask, err) \ #define XSTATE_XSAVE(st, lmask, hmask, err) \
asm volatile(ALTERNATIVE_2(XSAVE, \ asm volatile(ALTERNATIVE_3(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \ XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVEC, X86_FEATURE_XSAVEC, \
XSAVES, X86_FEATURE_XSAVES) \ XSAVES, X86_FEATURE_XSAVES) \
"\n" \ "\n" \
"xor %[err], %[err]\n" \ "xor %[err], %[err]\n" \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment