Commit 3ef3ace4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_cpu_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cpu updates from Borislav Petkov:

 - Split MTRR and PAT init code to accomodate at least Xen PV and TDX
   guests which do not get MTRRs exposed but only PAT. (TDX guests do
   not support the cache disabling dance when setting up MTRRs so they
   fall under the same category)

   This is a cleanup work to remove all the ugly workarounds for such
   guests and init things separately (Juergen Gross)

 - Add two new Intel CPUs to the list of CPUs with "normal" Energy
   Performance Bias, leading to power savings

 - Do not do bus master arbitration in C3 (ARB_DISABLE) on modern
   Centaur CPUs

* tag 'x86_cpu_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  x86/mtrr: Make message for disabled MTRRs more descriptive
  x86/pat: Handle TDX guest PAT initialization
  x86/cpuid: Carve out all CPUID functionality
  x86/cpu: Switch to cpu_feature_enabled() for X86_FEATURE_XENPV
  x86/cpu: Remove X86_FEATURE_XENPV usage in setup_cpu_entry_area()
  x86/cpu: Drop 32-bit Xen PV guest code in update_task_stack()
  x86/cpu: Remove unneeded 64-bit dependency in arch_enter_from_user_mode()
  x86/cpufeatures: Add X86_FEATURE_XENPV to disabled-features.h
  x86/acpi/cstate: Optimize ARB_DISABLE on Centaur CPUs
  x86/mtrr: Simplify mtrr_ops initialization
  x86/cacheinfo: Switch cache_ap_init() to hotplug callback
  x86: Decouple PAT and MTRR handling
  x86/mtrr: Add a stop_machine() handler calling only cache_cpu_init()
  x86/mtrr: Let cache_aps_delayed_init replace mtrr_aps_delayed_init
  x86/mtrr: Get rid of __mtrr_enabled bool
  x86/mtrr: Simplify mtrr_bp_init()
  x86/mtrr: Remove set_all callback from struct mtrr_ops
  x86/mtrr: Disentangle MTRR init from PAT init
  x86/mtrr: Move cache control code to cacheinfo.c
  x86/mtrr: Split MTRR-specific handling from cache dis/enabling
  ...
parents 4eb77fa1 7882b69e
......@@ -2,7 +2,20 @@
#ifndef _ASM_X86_CACHEINFO_H
#define _ASM_X86_CACHEINFO_H
/* Kernel controls MTRR and/or PAT MSRs. */
extern unsigned int memory_caching_control;
#define CACHE_MTRR 0x01
#define CACHE_PAT 0x02
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu);
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu);
void cache_disable(void);
void cache_enable(void);
void set_cache_aps_delayed_init(bool val);
bool get_cache_aps_delayed_init(void);
void cache_bp_init(void);
void cache_bp_restore(void);
void cache_aps_init(void);
#endif /* _ASM_X86_CACHEINFO_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* CPUID-related helpers/definitions
*
* Derived from arch/x86/kvm/cpuid.c
*/
#ifndef _ASM_X86_CPUID_H
#define _ASM_X86_CPUID_H
#include <asm/string.h>
struct cpuid_regs {
u32 eax, ebx, ecx, edx;
};
enum cpuid_regs_idx {
CPUID_EAX = 0,
CPUID_EBX,
CPUID_ECX,
CPUID_EDX,
};
#ifdef CONFIG_X86_32
extern int have_cpuid_p(void);
#else
static inline int have_cpuid_p(void)
{
return 1;
}
#endif
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx)
: "memory");
}
#define native_cpuid_reg(reg) \
static inline unsigned int native_cpuid_##reg(unsigned int op) \
{ \
unsigned int eax = op, ebx, ecx = 0, edx; \
\
native_cpuid(&eax, &ebx, &ecx, &edx); \
\
return reg; \
}
/*
* Native CPUID functions returning a single datum.
*/
native_cpuid_reg(eax)
native_cpuid_reg(ebx)
native_cpuid_reg(ecx)
native_cpuid_reg(edx)
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
#endif
/*
* Generic CPUID function
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
* resulting in stale register contents being returned.
*/
static inline void cpuid(unsigned int op,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = 0;
__cpuid(eax, ebx, ecx, edx);
}
/* Some CPUID calls want 'count' to be placed in ecx */
static inline void cpuid_count(unsigned int op, int count,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = count;
__cpuid(eax, ebx, ecx, edx);
}
/*
* CPUID functions returning a single datum
*/
static inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return eax;
}
static inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return ebx;
}
static inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return ecx;
}
static inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return edx;
}
static __always_inline bool cpuid_function_is_indexed(u32 function)
{
switch (function) {
......@@ -31,4 +150,22 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
return false;
}
#define for_each_possible_hypervisor_cpuid_base(function) \
for (function = 0x40000000; function < 0x40010000; function += 0x100)
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
uint32_t base, eax, signature[3];
for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
if (!memcmp(sig, signature, 12) &&
(leaves == 0 || ((eax - base) >= leaves)))
return base;
}
return 0;
}
#endif /* _ASM_X86_CPUID_H */
......@@ -81,6 +81,12 @@
# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
#endif
#ifdef CONFIG_XEN_PV
# define DISABLE_XENPV 0
#else
# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31))
#endif
#ifdef CONFIG_INTEL_TDX_GUEST
# define DISABLE_TDX_GUEST 0
#else
......@@ -98,7 +104,7 @@
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
......
......@@ -24,8 +24,8 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
/*
* For !SMAP hardware we patch out CLAC on entry.
*/
if (boot_cpu_has(X86_FEATURE_SMAP) ||
(IS_ENABLED(CONFIG_64BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
if (cpu_feature_enabled(X86_FEATURE_SMAP) ||
cpu_feature_enabled(X86_FEATURE_XENPV))
mask |= X86_EFLAGS_AC;
WARN_ON_ONCE(flags & mask);
......
......@@ -6,9 +6,8 @@
#include <asm/pgtable_types.h>
extern bool pat_enabled(void);
extern void pat_disable(const char *reason);
extern void pat_init(void);
extern void init_cache_modes(void);
extern void pat_bp_init(void);
extern void pat_cpu_init(void);
extern int memtype_reserve(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
......
......@@ -793,6 +793,7 @@
#define ENERGY_PERF_BIAS_PERFORMANCE 0
#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
#define ENERGY_PERF_BIAS_NORMAL 6
#define ENERGY_PERF_BIAS_NORMAL_POWERSAVE 7
#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
#define ENERGY_PERF_BIAS_POWERSAVE 15
......
......@@ -25,13 +25,12 @@
#include <uapi/asm/mtrr.h>
void mtrr_bp_init(void);
/*
* The following functions are for use by other drivers that cannot use
* arch_phys_wc_add and arch_phys_wc_del.
*/
# ifdef CONFIG_MTRR
void mtrr_bp_init(void);
extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
......@@ -42,12 +41,12 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern void mtrr_ap_init(void);
extern void set_mtrr_aps_delayed_init(void);
extern void mtrr_aps_init(void);
extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
void mtrr_disable(void);
void mtrr_enable(void);
void mtrr_generic_set_state(void);
# else
static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
{
......@@ -83,10 +82,11 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
#define mtrr_ap_init() do {} while (0)
#define set_mtrr_aps_delayed_init() do {} while (0)
#define mtrr_aps_init() do {} while (0)
#define mtrr_bp_init() do {} while (0)
#define mtrr_bp_restore() do {} while (0)
#define mtrr_disable() do {} while (0)
#define mtrr_enable() do {} while (0)
#define mtrr_generic_set_state() do {} while (0)
# endif
#ifdef CONFIG_COMPAT
......
......@@ -16,6 +16,7 @@ struct vm86;
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeatures.h>
#include <asm/cpuid.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
......@@ -146,17 +147,6 @@ struct cpuinfo_x86 {
unsigned initialized : 1;
} __randomize_layout;
struct cpuid_regs {
u32 eax, ebx, ecx, edx;
};
enum cpuid_regs_idx {
CPUID_EAX = 0,
CPUID_EBX,
CPUID_ECX,
CPUID_EDX,
};
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
......@@ -205,45 +195,6 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
#ifdef CONFIG_X86_32
extern int have_cpuid_p(void);
#else
static inline int have_cpuid_p(void)
{
return 1;
}
#endif
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx)
: "memory");
}
#define native_cpuid_reg(reg) \
static inline unsigned int native_cpuid_##reg(unsigned int op) \
{ \
unsigned int eax = op, ebx, ecx = 0, edx; \
\
native_cpuid(&eax, &ebx, &ecx, &edx); \
\
return reg; \
}
/*
* Native CPUID functions returning a single datum.
*/
native_cpuid_reg(eax)
native_cpuid_reg(ebx)
native_cpuid_reg(ecx)
native_cpuid_reg(edx)
/*
* Friendlier CR3 helpers.
*/
......@@ -578,7 +529,6 @@ static __always_inline bool on_thread_stack(void)
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
static inline void load_sp0(unsigned long sp0)
{
......@@ -589,69 +539,6 @@ static inline void load_sp0(unsigned long sp0)
unsigned long __get_wchan(struct task_struct *p);
/*
* Generic CPUID function
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
* resulting in stale register contents being returned.
*/
static inline void cpuid(unsigned int op,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = 0;
__cpuid(eax, ebx, ecx, edx);
}
/* Some CPUID calls want 'count' to be placed in ecx */
static inline void cpuid_count(unsigned int op, int count,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = count;
__cpuid(eax, ebx, ecx, edx);
}
/*
* CPUID functions returning a single datum
*/
static inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return eax;
}
static inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return ebx;
}
static inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return ecx;
}
static inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return edx;
}
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void amd_e400_c1e_apic_setup(void);
......@@ -805,24 +692,6 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
#endif
#define for_each_possible_hypervisor_cpuid_base(function) \
for (function = 0x40000000; function < 0x40010000; function += 0x100)
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
uint32_t base, eax, signature[3];
for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
if (!memcmp(sig, signature, 12) &&
(leaves == 0 || ((eax - base) >= leaves)))
return base;
}
return 0;
}
extern unsigned long arch_align_stack(unsigned long sp);
void free_init_pages(const char *what, unsigned long begin, unsigned long end);
extern void free_kernel_image_pages(const char *what, void *begin, void *end);
......
......@@ -66,13 +66,10 @@ static inline void update_task_stack(struct task_struct *task)
{
/* sp0 always points to the entry trampoline stack, which is constant: */
#ifdef CONFIG_X86_32
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task->thread.sp0);
else
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
/* Xen PV enters the kernel on the thread stack. */
if (static_cpu_has(X86_FEATURE_XENPV))
if (cpu_feature_enabled(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
}
......
......@@ -52,17 +52,25 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
if (c->x86_vendor == X86_VENDOR_INTEL &&
(c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
flags->bm_control = 0;
/*
* For all recent Centaur CPUs, the ucode will make sure that each
* core can keep cache coherence with each other while entering C3
* type state. So, set bm_check to 1 to indicate that the kernel
* doesn't need to execute a cache flush operation (WBINVD) when
* entering C3 type state.
*/
if (c->x86_vendor == X86_VENDOR_CENTAUR) {
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
c->x86_stepping >= 0x0e))
c->x86_stepping >= 0x0e)) {
/*
* For all recent Centaur CPUs, the ucode will make sure that each
* core can keep cache coherence with each other while entering C3
* type state. So, set bm_check to 1 to indicate that the kernel
* doesn't need to execute a cache flush operation (WBINVD) when
* entering C3 type state.
*/
flags->bm_check = 1;
/*
* For all recent Centaur platforms, ARB_DISABLE is a nop.
* Set bm_control to zero to indicate that ARB_DISABLE is
* not required while entering C3 type state.
*/
flags->bm_control = 0;
}
}
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
......
......@@ -983,7 +983,7 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
if (!cpu_feature_enabled(X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
/*
......
......@@ -1302,7 +1302,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
if (cmd == SPECTRE_V2_CMD_IBRS && cpu_feature_enabled(X86_FEATURE_XENPV)) {
pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
......@@ -2206,74 +2206,74 @@ static const char * const l1tf_vmx_states[] = {
static ssize_t l1tf_show_state(char *buf)
{
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG);
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
sched_smt_active())) {
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
return sysfs_emit(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
}
return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
return sysfs_emit(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t itlb_multihit_show_state(char *buf)
{
if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
!boot_cpu_has(X86_FEATURE_VMX))
return sprintf(buf, "KVM: Mitigation: VMX unsupported\n");
return sysfs_emit(buf, "KVM: Mitigation: VMX unsupported\n");
else if (!(cr4_read_shadow() & X86_CR4_VMXE))
return sprintf(buf, "KVM: Mitigation: VMX disabled\n");
return sysfs_emit(buf, "KVM: Mitigation: VMX disabled\n");
else if (itlb_multihit_kvm_mitigation)
return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
return sysfs_emit(buf, "KVM: Mitigation: Split huge pages\n");
else
return sprintf(buf, "KVM: Vulnerable\n");
return sysfs_emit(buf, "KVM: Vulnerable\n");
}
#else
static ssize_t l1tf_show_state(char *buf)
{
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG);
}
static ssize_t itlb_multihit_show_state(char *buf)
{
return sprintf(buf, "Processor vulnerable\n");
return sysfs_emit(buf, "Processor vulnerable\n");
}
#endif
static ssize_t mds_show_state(char *buf)
{
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
return sysfs_emit(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
}
if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
(mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
sched_smt_active() ? "mitigated" : "disabled"));
return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
(mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
sched_smt_active() ? "mitigated" : "disabled"));
}
return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t tsx_async_abort_show_state(char *buf)
{
if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
(taa_mitigation == TAA_MITIGATION_OFF))
return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
return sysfs_emit(buf, "%s\n", taa_strings[taa_mitigation]);
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
taa_strings[taa_mitigation]);
return sysfs_emit(buf, "%s; SMT Host state unknown\n",
taa_strings[taa_mitigation]);
}
return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
return sysfs_emit(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t mmio_stale_data_show_state(char *buf)
......@@ -2341,73 +2341,72 @@ static char *pbrsb_eibrs_state(void)
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
return sprintf(buf, "Vulnerable: LFENCE\n");
return sysfs_emit(buf, "Vulnerable: LFENCE\n");
if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
if (sched_smt_active() && unprivileged_ebpf_enabled() &&
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sprintf(buf, "%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
pbrsb_eibrs_state(),
spectre_v2_module_string());
return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
pbrsb_eibrs_state(),
spectre_v2_module_string());
}
static ssize_t srbds_show_state(char *buf)
{
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
return sysfs_emit(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
static ssize_t retbleed_show_state(char *buf)
{
if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
return sysfs_emit(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
return sprintf(buf, "%s; SMT %s\n",
retbleed_strings[retbleed_mitigation],
!sched_smt_active() ? "disabled" :
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
"enabled with STIBP protection" : "vulnerable");
return sysfs_emit(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation],
!sched_smt_active() ? "disabled" :
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
"enabled with STIBP protection" : "vulnerable");
}
return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
}
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
return sysfs_emit(buf, "Not affected\n");
switch (bug) {
case X86_BUG_CPU_MELTDOWN:
if (boot_cpu_has(X86_FEATURE_PTI))
return sprintf(buf, "Mitigation: PTI\n");
return sysfs_emit(buf, "Mitigation: PTI\n");
if (hypervisor_is_type(X86_HYPER_XEN_PV))
return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
return sysfs_emit(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
break;
case X86_BUG_SPECTRE_V1:
return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
return sysfs_emit(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
return spectre_v2_show_state(buf);
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
return sysfs_emit(buf, "%s\n", ssb_strings[ssb_mode]);
case X86_BUG_L1TF:
if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
......@@ -2437,7 +2436,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
break;
}
return sprintf(buf, "Vulnerable\n");
return sysfs_emit(buf, "Vulnerable\n");
}
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
......
......@@ -11,15 +11,19 @@
#include <linux/slab.h>
#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
#include <linux/sched.h>
#include <linux/capability.h>
#include <linux/sysfs.h>
#include <linux/pci.h>
#include <linux/stop_machine.h>
#include <asm/cpufeature.h>
#include <asm/cacheinfo.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
#include "cpu.h"
......@@ -35,6 +39,9 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Shared L2 cache maps */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
/* Kernel controls MTRR and/or PAT MSRs. */
unsigned int memory_caching_control __ro_after_init;
struct _cache_table {
unsigned char descriptor;
char cache_type;
......@@ -1040,3 +1047,175 @@ int populate_cache_leaves(unsigned int cpu)
return 0;
}
/*
* Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
*
* Since we are disabling the cache don't allow any interrupts,
* they would run extremely slow and would only increase the pain.
*
* The caller must ensure that local interrupts are disabled and
* are reenabled after cache_enable() has been called.
*/
static unsigned long saved_cr4;
static DEFINE_RAW_SPINLOCK(cache_disable_lock);
void cache_disable(void) __acquires(cache_disable_lock)
{
unsigned long cr0;
/*
* Note that this is not ideal
* since the cache is only flushed/disabled for this CPU while the
* MTRRs are changed, but changing this requires more invasive
* changes to the way the kernel boots
*/
raw_spin_lock(&cache_disable_lock);
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
/*
* Cache flushing is the most time-consuming step when programming
* the MTRRs. Fortunately, as per the Intel Software Development
* Manual, we can skip it if the processor supports cache self-
* snooping.
*/
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_feature_enabled(X86_FEATURE_PGE)) {
saved_cr4 = __read_cr4();
__write_cr4(saved_cr4 & ~X86_CR4_PGE);
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
flush_tlb_local();
if (cpu_feature_enabled(X86_FEATURE_MTRR))
mtrr_disable();
/* Again, only flush caches if we have to. */
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
wbinvd();
}
void cache_enable(void) __releases(cache_disable_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
flush_tlb_local();
if (cpu_feature_enabled(X86_FEATURE_MTRR))
mtrr_enable();
/* Enable caches */
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_feature_enabled(X86_FEATURE_PGE))
__write_cr4(saved_cr4);
raw_spin_unlock(&cache_disable_lock);
}
static void cache_cpu_init(void)
{
unsigned long flags;
local_irq_save(flags);
cache_disable();
if (memory_caching_control & CACHE_MTRR)
mtrr_generic_set_state();
if (memory_caching_control & CACHE_PAT)
pat_cpu_init();
cache_enable();
local_irq_restore(flags);
}
static bool cache_aps_delayed_init = true;
void set_cache_aps_delayed_init(bool val)
{
cache_aps_delayed_init = val;
}
bool get_cache_aps_delayed_init(void)
{
return cache_aps_delayed_init;
}
static int cache_rendezvous_handler(void *unused)
{
if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
cache_cpu_init();
return 0;
}
void __init cache_bp_init(void)
{
mtrr_bp_init();
pat_bp_init();
if (memory_caching_control)
cache_cpu_init();
}
void cache_bp_restore(void)
{
if (memory_caching_control)
cache_cpu_init();
}
static int cache_ap_init(unsigned int cpu)
{
if (!memory_caching_control || get_cache_aps_delayed_init())
return 0;
/*
* Ideally we should hold mtrr_mutex here to avoid MTRR entries
* changed, but this routine will be called in CPU boot time,
* holding the lock breaks it.
*
* This routine is called in two cases:
*
* 1. very early time of software resume, when there absolutely
* isn't MTRR entry changes;
*
* 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
* lock to prevent MTRR entry changes
*/
stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
cpu_callout_mask);
return 0;
}
/*
* Delayed cache initialization for all AP's
*/
void cache_aps_init(void)
{
if (!memory_caching_control || !get_cache_aps_delayed_init())
return;
stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
set_cache_aps_delayed_init(false);
}
static int __init cache_ap_register(void)
{
cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
"x86/cachectrl:starting",
cache_ap_init, NULL);
return 0;
}
core_initcall(cache_ap_register);
......@@ -52,6 +52,7 @@
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/cacheinfo.h>
#include <asm/memtype.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
......@@ -1948,7 +1949,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_32
enable_sep_cpu();
#endif
mtrr_ap_init();
validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
......
......@@ -339,7 +339,7 @@ static void init_hygon(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARAT);
/* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
if (!cpu_feature_enabled(X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
check_null_seg_clears_base(c);
......
......@@ -204,7 +204,12 @@ static int intel_epb_offline(unsigned int cpu)
}
static const struct x86_cpu_id intel_epb_normal[] = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 7),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,
ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,
ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,
ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
{}
};
......
......@@ -109,7 +109,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
return 0;
}
static const struct mtrr_ops amd_mtrr_ops = {
const struct mtrr_ops amd_mtrr_ops = {
.vendor = X86_VENDOR_AMD,
.set = amd_set_mtrr,
.get = amd_get_mtrr,
......@@ -117,9 +117,3 @@ static const struct mtrr_ops amd_mtrr_ops = {
.validate_add_page = amd_validate_add_page,
.have_wrcomb = positive_have_wrcomb,
};
int __init amd_init_mtrr(void)
{
set_mtrr_ops(&amd_mtrr_ops);
return 0;
}
......@@ -111,7 +111,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
return 0;
}
static const struct mtrr_ops centaur_mtrr_ops = {
const struct mtrr_ops centaur_mtrr_ops = {
.vendor = X86_VENDOR_CENTAUR,
.set = centaur_set_mcr,
.get = centaur_get_mcr,
......@@ -119,9 +119,3 @@ static const struct mtrr_ops centaur_mtrr_ops = {
.validate_add_page = centaur_validate_add_page,
.have_wrcomb = positive_have_wrcomb,
};
int __init centaur_init_mtrr(void)
{
set_mtrr_ops(&centaur_mtrr_ops);
return 0;
}
......@@ -234,51 +234,11 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
post_set();
}
typedef struct {
unsigned long base;
unsigned long size;
mtrr_type type;
} arr_state_t;
static arr_state_t arr_state[8] = {
{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
};
static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
static void cyrix_set_all(void)
{
int i;
prepare_set();
/* the CCRs are not contiguous */
for (i = 0; i < 4; i++)
setCx86(CX86_CCR0 + i, ccr_state[i]);
for (; i < 7; i++)
setCx86(CX86_CCR4 + i, ccr_state[i]);
for (i = 0; i < 8; i++) {
cyrix_set_arr(i, arr_state[i].base,
arr_state[i].size, arr_state[i].type);
}
post_set();
}
static const struct mtrr_ops cyrix_mtrr_ops = {
const struct mtrr_ops cyrix_mtrr_ops = {
.vendor = X86_VENDOR_CYRIX,
.set_all = cyrix_set_all,
.set = cyrix_set_arr,
.get = cyrix_get_arr,
.get_free_region = cyrix_get_free_region,
.validate_add_page = generic_validate_add_page,
.have_wrcomb = positive_have_wrcomb,
};
int __init cyrix_init_mtrr(void)
{
set_mtrr_ops(&cyrix_mtrr_ops);
return 0;
}
......@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <asm/processor-flags.h>
#include <asm/cacheinfo.h>
#include <asm/cpufeature.h>
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
......@@ -396,9 +397,6 @@ print_fixed(unsigned base, unsigned step, const mtrr_type *types)
}
}
static void prepare_set(void);
static void post_set(void);
static void __init print_mtrr_state(void)
{
unsigned int i;
......@@ -444,20 +442,6 @@ static void __init print_mtrr_state(void)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
/* PAT setup for BP. We need to go through sync steps here */
void __init mtrr_bp_pat_init(void)
{
unsigned long flags;
local_irq_save(flags);
prepare_set();
pat_init();
post_set();
local_irq_restore(flags);
}
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
......@@ -684,7 +668,10 @@ static u32 deftype_lo, deftype_hi;
/**
* set_mtrr_state - Set the MTRR state for this CPU.
*
* NOTE: The CPU must already be in a safe state for MTRR changes.
* NOTE: The CPU must already be in a safe state for MTRR changes, including
* measures that only a single CPU can be active in set_mtrr_state() in
* order to not be subject to races for usage of deftype_lo. This is
* accomplished by taking cache_disable_lock.
* RETURNS: 0 if no changes made, else a mask indicating what was changed.
*/
static unsigned long set_mtrr_state(void)
......@@ -715,106 +702,34 @@ static unsigned long set_mtrr_state(void)
return change_mask;
}
static unsigned long cr4;
static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
/*
* Since we are disabling the cache don't allow any interrupts,
* they would run extremely slow and would only increase the pain.
*
* The caller must ensure that local interrupts are disabled and
* are reenabled after post_set() has been called.
*/
static void prepare_set(void) __acquires(set_atomicity_lock)
void mtrr_disable(void)
{
unsigned long cr0;
/*
* Note that this is not ideal
* since the cache is only flushed/disabled for this CPU while the
* MTRRs are changed, but changing this requires more invasive
* changes to the way the kernel boots
*/
raw_spin_lock(&set_atomicity_lock);
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
/*
* Cache flushing is the most time-consuming step when programming
* the MTRRs. Fortunately, as per the Intel Software Development
* Manual, we can skip it if the processor supports cache self-
* snooping.
*/
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
flush_tlb_local();
/* Save MTRR state */
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
/* Again, only flush caches if we have to. */
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
wbinvd();
}
static void post_set(void) __releases(set_atomicity_lock)
void mtrr_enable(void)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
flush_tlb_local();
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Enable caches */
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
static void generic_set_all(void)
void mtrr_generic_set_state(void)
{
unsigned long mask, count;
unsigned long flags;
local_irq_save(flags);
prepare_set();
/* Actually set the state */
mask = set_mtrr_state();
/* also set PAT */
pat_init();
post_set();
local_irq_restore(flags);
/* Use the atomic bitops to update the global mask */
for (count = 0; count < sizeof(mask) * 8; ++count) {
if (mask & 0x01)
set_bit(count, &smp_changes_mask);
mask >>= 1;
}
}
/**
......@@ -836,7 +751,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
vr = &mtrr_state.var_ranges[reg];
local_irq_save(flags);
prepare_set();
cache_disable();
if (size == 0) {
/*
......@@ -855,7 +770,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
}
post_set();
cache_enable();
local_irq_restore(flags);
}
......@@ -914,8 +829,6 @@ int positive_have_wrcomb(void)
* Generic structure...
*/
const struct mtrr_ops generic_mtrr_ops = {
.use_intel_if = 1,
.set_all = generic_set_all,
.get = generic_get_mtrr,
.get_free_region = generic_get_free_region,
.set = generic_set_mtrr,
......
......@@ -46,6 +46,7 @@
#include <linux/syscore_ops.h>
#include <linux/rcupdate.h>
#include <asm/cacheinfo.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
#include <asm/mtrr.h>
......@@ -58,32 +59,18 @@
#define MTRR_TO_PHYS_WC_OFFSET 1000
u32 num_var_ranges;
static bool __mtrr_enabled;
static bool mtrr_enabled(void)
{
return __mtrr_enabled;
return !!mtrr_if;
}
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
static bool mtrr_aps_delayed_init;
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
const struct mtrr_ops *mtrr_if;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void __init set_mtrr_ops(const struct mtrr_ops *ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
mtrr_ops[ops->vendor] = ops;
}
/* Returns non-zero if we have the write-combining memory type */
static int have_wrcomb(void)
{
......@@ -119,11 +106,11 @@ static int have_wrcomb(void)
}
/* This function returns the number of variable MTRRs */
static void __init set_num_var_ranges(void)
static void __init set_num_var_ranges(bool use_generic)
{
unsigned long config = 0, dummy;
if (use_intel())
if (use_generic)
rdmsr(MSR_MTRRcap, config, dummy);
else if (is_cpu(AMD) || is_cpu(HYGON))
config = 2;
......@@ -160,25 +147,8 @@ static int mtrr_rendezvous_handler(void *info)
{
struct set_mtrr_data *data = info;
/*
* We use this same function to initialize the mtrrs during boot,
* resume, runtime cpu online and on an explicit request to set a
* specific MTRR.
*
* During boot or suspend, the state of the boot cpu's mtrrs has been
* saved, and we want to replicate that across all the cpus that come
* online (either at the end of boot or resume or during a runtime cpu
* online). If we're doing that, @reg is set to something special and on
* all the cpu's we do mtrr_if->set_all() (On the logical cpu that
* started the boot/resume sequence, this might be a duplicate
* set_all()).
*/
if (data->smp_reg != ~0U) {
mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
mtrr_if->set_all();
}
mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
return 0;
}
......@@ -248,19 +218,6 @@ static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
}
static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
{
struct set_mtrr_data data = { .smp_reg = reg,
.smp_base = base,
.smp_size = size,
.smp_type = type
};
stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
cpu_callout_mask);
}
/**
* mtrr_add_page - Add a memory type region
* @base: Physical base address of region in pages (in units of 4 kB!)
......@@ -617,20 +574,6 @@ int arch_phys_wc_index(int handle)
}
EXPORT_SYMBOL_GPL(arch_phys_wc_index);
/*
* HACK ALERT!
* These should be called implicitly, but we can't yet until all the initcall
* stuff is done...
*/
static void __init init_ifs(void)
{
#ifndef CONFIG_X86_64
amd_init_mtrr();
cyrix_init_mtrr();
centaur_init_mtrr();
#endif
}
/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
* MTRR driver doesn't require this
*/
......@@ -686,10 +629,9 @@ int __initdata changed_by_mtrr_cleanup;
*/
void __init mtrr_bp_init(void)
{
const char *why = "(not available)";
u32 phys_addr;
init_ifs();
phys_addr = 32;
if (boot_cpu_has(X86_FEATURE_MTRR)) {
......@@ -730,21 +672,21 @@ void __init mtrr_bp_init(void)
case X86_VENDOR_AMD:
if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
mtrr_if = &amd_mtrr_ops;
size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CENTAUR:
if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
mtrr_if = &centaur_mtrr_ops;
size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
mtrr_if = &cyrix_mtrr_ops;
size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
......@@ -754,58 +696,23 @@ void __init mtrr_bp_init(void)
}
}
if (mtrr_if) {
__mtrr_enabled = true;
set_num_var_ranges();
if (mtrr_enabled()) {
set_num_var_ranges(mtrr_if == &generic_mtrr_ops);
init_table();
if (use_intel()) {
if (mtrr_if == &generic_mtrr_ops) {
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
if (mtrr_enabled())
mtrr_bp_pat_init();
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
if (get_mtrr_state()) {
memory_caching_control |= CACHE_MTRR;
changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr);
} else {
mtrr_if = NULL;
why = "by BIOS";
}
}
}
if (!mtrr_enabled()) {
pr_info("Disabled\n");
/*
* PAT initialization relies on MTRR's rendezvous handler.
* Skip PAT init until the handler can initialize both
* features independently.
*/
pat_disable("MTRRs disabled, skipping PAT initialization too.");
}
}
void mtrr_ap_init(void)
{
if (!mtrr_enabled())
return;
if (!use_intel() || mtrr_aps_delayed_init)
return;
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,
* holding the lock breaks it.
*
* This routine is called in two cases:
*
* 1. very early time of software resume, when there absolutely
* isn't mtrr entry changes;
*
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
* lock to prevent mtrr entry changes
*/
set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
pr_info("MTRRs disabled %s\n", why);
}
/**
......@@ -823,50 +730,12 @@ void mtrr_save_state(void)
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
}
void set_mtrr_aps_delayed_init(void)
{
if (!mtrr_enabled())
return;
if (!use_intel())
return;
mtrr_aps_delayed_init = true;
}
/*
* Delayed MTRR initialization for all AP's
*/
void mtrr_aps_init(void)
{
if (!use_intel() || !mtrr_enabled())
return;
/*
* Check if someone has requested the delay of AP MTRR initialization,
* by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
* then we are done.
*/
if (!mtrr_aps_delayed_init)
return;
set_mtrr(~0U, 0, 0, 0);
mtrr_aps_delayed_init = false;
}
void mtrr_bp_restore(void)
{
if (!use_intel() || !mtrr_enabled())
return;
mtrr_if->set_all();
}
static int __init mtrr_init_finialize(void)
{
if (!mtrr_enabled())
return 0;
if (use_intel()) {
if (memory_caching_control & CACHE_MTRR) {
if (!changed_by_mtrr_cleanup)
mtrr_state_warn();
return 0;
......
......@@ -14,11 +14,8 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
u32 use_intel_if;
void (*set)(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void (*set_all)(void);
void (*get)(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type);
int (*get_free_region)(unsigned long base, unsigned long size,
......@@ -53,15 +50,11 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
void mtrr_bp_pat_init(void);
extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
extern u64 size_or_mask, size_and_mask;
extern const struct mtrr_ops *mtrr_if;
#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
extern unsigned int num_var_ranges;
extern u64 mtrr_tom2;
......@@ -71,10 +64,10 @@ void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
/* CPU specific mtrr init functions */
int amd_init_mtrr(void);
int cyrix_init_mtrr(void);
int centaur_init_mtrr(void);
/* CPU specific mtrr_ops vectors. */
extern const struct mtrr_ops amd_mtrr_ops;
extern const struct mtrr_ops cyrix_mtrr_ops;
extern const struct mtrr_ops centaur_mtrr_ops;
extern int changed_by_mtrr_cleanup;
extern int mtrr_cleanup(unsigned address_bits);
......@@ -165,7 +165,7 @@ static noinstr unsigned long __rdgsbase_inactive(void)
lockdep_assert_irqs_disabled();
if (!static_cpu_has(X86_FEATURE_XENPV)) {
if (!cpu_feature_enabled(X86_FEATURE_XENPV)) {
native_swapgs();
gsbase = rdgsbase();
native_swapgs();
......@@ -190,7 +190,7 @@ static noinstr void __wrgsbase_inactive(unsigned long gsbase)
{
lockdep_assert_irqs_disabled();
if (!static_cpu_has(X86_FEATURE_XENPV)) {
if (!cpu_feature_enabled(X86_FEATURE_XENPV)) {
native_swapgs();
wrgsbase(gsbase);
native_swapgs();
......
......@@ -35,6 +35,7 @@
#include <asm/numa.h>
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
......@@ -1075,23 +1076,12 @@ void __init setup_arch(char **cmdline_p)
max_pfn = e820__end_of_ram_pfn();
/* update e820 for memory not covered by WB MTRRs */
if (IS_ENABLED(CONFIG_MTRR))
mtrr_bp_init();
else
pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
cache_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
max_pfn = e820__end_of_ram_pfn();
max_possible_pfn = max_pfn;
/*
* This call is required when the CPU does not support PAT. If
* mtrr_bp_init() invoked it already via pat_init() the call has no
* effect.
*/
init_cache_modes();
/*
* Define random base addresses for memory sections after max_pfn is
* defined and before each memory section base is used.
......
......@@ -59,6 +59,7 @@
#include <linux/stackprotector.h>
#include <asm/acpi.h>
#include <asm/cacheinfo.h>
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
......@@ -1429,8 +1430,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
uv_system_init();
set_mtrr_aps_delayed_init();
smp_quirk_init_udelay();
speculative_store_bypass_ht_init();
......@@ -1440,12 +1439,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
void arch_thaw_secondary_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
set_cache_aps_delayed_init(true);
}
void arch_thaw_secondary_cpus_end(void)
{
mtrr_aps_init();
cache_aps_init();
}
/*
......@@ -1488,7 +1487,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
nmi_selftest();
impress_friends();
mtrr_aps_init();
cache_aps_init();
}
static int __initdata setup_possible_cpus = -1;
......
......@@ -106,7 +106,7 @@ int arch_register_cpu(int num)
* Xen PV guests don't support CPU0 hotplug at all.
*/
if (c->x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_has(X86_FEATURE_XENPV))
cpu_feature_enabled(X86_FEATURE_XENPV))
cpu0_hotpluggable = 0;
/*
......
......@@ -138,17 +138,13 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
pgprot_t tss_prot = PAGE_KERNEL_RO;
#else
/*
* On native 32-bit systems, the GDT cannot be read-only because
* On 32-bit systems, the GDT cannot be read-only because
* our double fault handler uses a task gate, and entering through
* a task gate needs to change an available TSS to busy. If the
* GDT is read-only, that will triple fault. The TSS cannot be
* read-only because the CPU writes to it on task switches.
*
* On Xen PV, the GDT must be read-only because the hypervisor
* requires it.
*/
pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
PAGE_KERNEL_RO : PAGE_KERNEL;
pgprot_t gdt_prot = PAGE_KERNEL;
pgprot_t tss_prot = PAGE_KERNEL;
#endif
......
......@@ -43,6 +43,7 @@
#include <linux/rbtree.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
......@@ -60,41 +61,34 @@
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __read_mostly pat_bp_enabled;
static bool __read_mostly pat_cm_initialized;
static u64 __ro_after_init pat_msr_val;
/*
* PAT support is enabled by default, but can be disabled for
* various user-requested or hardware-forced reasons:
*/
void pat_disable(const char *msg_reason)
static void __init pat_disable(const char *msg_reason)
{
if (pat_disabled)
return;
if (pat_bp_initialized) {
WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
return;
}
pat_disabled = true;
pr_info("x86/PAT: %s\n", msg_reason);
memory_caching_control &= ~CACHE_PAT;
}
static int __init nopat(char *str)
{
pat_disable("PAT support disabled via boot option.");
pat_force_disabled = true;
return 0;
}
early_param("nopat", nopat);
bool pat_enabled(void)
{
return pat_bp_enabled;
return !pat_disabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
......@@ -192,7 +186,8 @@ enum {
#define CM(c) (_PAGE_CACHE_MODE_ ## c)
static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val,
char *msg)
{
enum page_cache_mode cache;
char *cache_mode;
......@@ -219,14 +214,12 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
static void __init_cache_modes(u64 pat)
static void __init init_cache_modes(u64 pat)
{
enum page_cache_mode cache;
char pat_msg[33];
int i;
WARN_ON_ONCE(pat_cm_initialized);
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
......@@ -234,34 +227,9 @@ static void __init_cache_modes(u64 pat)
update_cache_mode_entry(i, cache);
}
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
pat_cm_initialized = true;
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
static void pat_bp_init(u64 pat)
{
u64 tmp_pat;
if (!boot_cpu_has(X86_FEATURE_PAT)) {
pat_disable("PAT not supported by the CPU.");
return;
}
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
pat_disable("PAT support disabled by the firmware.");
return;
}
wrmsrl(MSR_IA32_CR_PAT, pat);
pat_bp_enabled = true;
__init_cache_modes(pat);
}
static void pat_ap_init(u64 pat)
void pat_cpu_init(void)
{
if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
......@@ -271,30 +239,39 @@ static void pat_ap_init(u64 pat)
panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
}
wrmsrl(MSR_IA32_CR_PAT, pat);
wrmsrl(MSR_IA32_CR_PAT, pat_msr_val);
}
void __init init_cache_modes(void)
/**
* pat_bp_init - Initialize the PAT MSR value and PAT table
*
* This function initializes PAT MSR value and PAT table with an OS-defined
* value to enable additional cache attributes, WC, WT and WP.
*
* This function prepares the calls of pat_cpu_init() via cache_cpu_init()
* on all CPUs.
*/
void __init pat_bp_init(void)
{
u64 pat = 0;
struct cpuinfo_x86 *c = &boot_cpu_data;
#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \
(((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \
((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \
((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \
((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56))
if (pat_cm_initialized)
return;
if (boot_cpu_has(X86_FEATURE_PAT)) {
/*
* CPU supports PAT. Set PAT table to be consistent with
* PAT MSR. This case supports "nopat" boot option, and
* virtual machine environments which support PAT without
* MTRRs. In specific, Xen has unique setup to PAT MSR.
*
* If PAT MSR returns 0, it is considered invalid and emulates
* as No PAT.
*/
rdmsrl(MSR_IA32_CR_PAT, pat);
}
if (!IS_ENABLED(CONFIG_X86_PAT))
pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
if (!cpu_feature_enabled(X86_FEATURE_PAT))
pat_disable("PAT not supported by the CPU.");
else
rdmsrl(MSR_IA32_CR_PAT, pat_msr_val);
if (!pat_msr_val) {
pat_disable("PAT support disabled by the firmware.");
if (!pat) {
/*
* No PAT. Emulate the PAT table that corresponds to the two
* cache bits, PWT (Write Through) and PCD (Cache Disable).
......@@ -313,40 +290,22 @@ void __init init_cache_modes(void)
* NOTE: When WC or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
} else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
/*
* Clearly PAT is enabled underneath. Allow pat_enabled() to
* reflect this.
*/
pat_bp_enabled = true;
pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC);
}
__init_cache_modes(pat);
}
/**
* pat_init - Initialize the PAT MSR and PAT table on the current CPU
*
* This function initializes PAT MSR and PAT table with an OS-defined value
* to enable additional cache attributes, WC, WT and WP.
*
* This function must be called on all CPUs using the specific sequence of
* operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
* procedure for PAT.
*/
void pat_init(void)
{
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
#ifndef CONFIG_X86_PAT
pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
#endif
if (pat_disabled)
/*
* Xen PV doesn't allow to set PAT MSR, but all cache modes are
* supported.
* When running as TDX guest setting the PAT MSR won't work either
* due to the requirement to set CR0.CD when doing so. Rely on
* firmware to have set the PAT MSR correctly.
*/
if (pat_disabled ||
cpu_feature_enabled(X86_FEATURE_XENPV) ||
cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
init_cache_modes(pat_msr_val);
return;
}
if ((c->x86_vendor == X86_VENDOR_INTEL) &&
(((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
......@@ -371,8 +330,7 @@ void pat_init(void)
* NOTE: When WT or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC);
} else {
/*
* Full PAT support. We put WT in slot 7 to improve
......@@ -400,19 +358,14 @@ void pat_init(void)
* The reserved slots are unused, but mapped to their
* corresponding types in the presence of PAT errata.
*/
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT);
}
if (!pat_bp_initialized) {
pat_bp_init(pat);
pat_bp_initialized = true;
} else {
pat_ap_init(pat);
}
}
memory_caching_control |= CACHE_PAT;
init_cache_modes(pat_msr_val);
#undef PAT
}
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
......
......@@ -23,6 +23,7 @@
#include <asm/fpu/api.h>
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/cacheinfo.h>
#include <asm/mmu_context.h>
#include <asm/cpu_device_id.h>
#include <asm/microcode.h>
......@@ -261,7 +262,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
do_fpu_end();
tsc_verify_tsc_adjust(true);
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
cache_bp_restore();
perf_restore_debug_store();
c = &cpu_data(smp_processor_id());
......
......@@ -140,6 +140,7 @@ enum cpuhp_state {
*/
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
CPUHP_AP_CACHECTRL_STARTING,
CPUHP_AP_SCHED_STARTING,
CPUHP_AP_RCUTREE_DYING,
CPUHP_AP_CPU_PM_STARTING,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment