Commit a9648072 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:
 "Xen fixes and featrues for 4.12. The main changes are:

   - enable building the kernel with Xen support but without enabling
     paravirtualized mode (Vitaly Kuznetsov)

   - add a new 9pfs xen frontend driver (Stefano Stabellini)

   - simplify Xen's cpuid handling by making use of cpu capabilities
     (Juergen Gross)

   - add/modify some headers for new Xen paravirtualized devices
     (Oleksandr Andrushchenko)

   - EFI reset_system support under Xen (Julien Grall)

   - and the usual cleanups and corrections"

* tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (57 commits)
  xen: Move xen_have_vector_callback definition to enlighten.c
  xen: Implement EFI reset_system callback
  arm/xen: Consolidate calls to shutdown hypercall in a single helper
  xen: Export xen_reboot
  xen/x86: Call xen_smp_intr_init_pv() on BSP
  xen: Revert commits da72ff5b and 72a9b186
  xen/pvh: Do not fill kernel's e820 map in init_pvh_bootparams()
  xen/scsifront: use offset_in_page() macro
  xen/arm,arm64: rename __generic_dma_ops to xen_get_dma_ops
  xen/arm,arm64: fix xen_dma_ops after 815dd187 "Consolidate get_dma_ops..."
  xen/9pfs: select CONFIG_XEN_XENBUS_FRONTEND
  x86/cpu: remove hypervisor specific set_cpu_features
  vmware: set cpu capabilities during platform initialization
  x86/xen: use capabilities instead of fake cpuid values for xsave
  x86/xen: use capabilities instead of fake cpuid values for x2apic
  x86/xen: use capabilities instead of fake cpuid values for mwait
  x86/xen: use capabilities instead of fake cpuid values for acpi
  x86/xen: use capabilities instead of fake cpuid values for acc
  x86/xen: use capabilities instead of fake cpuid values for mtrr
  x86/xen: use capabilities instead of fake cpuid values for aperf
  ...
parents a1be8edd 3dbd8204
...@@ -15,6 +15,9 @@ struct dev_archdata { ...@@ -15,6 +15,9 @@ struct dev_archdata {
#endif #endif
#ifdef CONFIG_ARM_DMA_USE_IOMMU #ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping; struct dma_iommu_mapping *mapping;
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif #endif
bool dma_coherent; bool dma_coherent;
}; };
......
...@@ -16,19 +16,9 @@ ...@@ -16,19 +16,9 @@
extern const struct dma_map_ops arm_dma_ops; extern const struct dma_map_ops arm_dma_ops;
extern const struct dma_map_ops arm_coherent_dma_ops; extern const struct dma_map_ops arm_coherent_dma_ops;
static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
{
if (dev && dev->dma_ops)
return dev->dma_ops;
return &arm_dma_ops;
}
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{ {
if (xen_initial_domain()) return &arm_dma_ops;
return xen_dma_ops;
else
return __generic_dma_ops(NULL);
} }
#define HAVE_ARCH_DMA_SUPPORTED 1 #define HAVE_ARCH_DMA_SUPPORTED 1
......
...@@ -2414,6 +2414,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ...@@ -2414,6 +2414,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dma_ops = arm_get_dma_map_ops(coherent); dma_ops = arm_get_dma_map_ops(coherent);
set_dma_ops(dev, dma_ops); set_dma_ops(dev, dma_ops);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
dev->dma_ops = xen_dma_ops;
}
#endif
} }
void arch_teardown_dma_ops(struct device *dev) void arch_teardown_dma_ops(struct device *dev)
......
...@@ -35,6 +35,6 @@ void __init xen_efi_runtime_setup(void) ...@@ -35,6 +35,6 @@ void __init xen_efi_runtime_setup(void)
efi.update_capsule = xen_efi_update_capsule; efi.update_capsule = xen_efi_update_capsule;
efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.query_capsule_caps = xen_efi_query_capsule_caps;
efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
efi.reset_system = NULL; /* Functionality provided by Xen. */ efi.reset_system = xen_efi_reset_system;
} }
EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); EXPORT_SYMBOL_GPL(xen_efi_runtime_setup);
...@@ -191,20 +191,24 @@ static int xen_dying_cpu(unsigned int cpu) ...@@ -191,20 +191,24 @@ static int xen_dying_cpu(unsigned int cpu)
return 0; return 0;
} }
static void xen_restart(enum reboot_mode reboot_mode, const char *cmd) void xen_reboot(int reason)
{ {
struct sched_shutdown r = { .reason = SHUTDOWN_reboot }; struct sched_shutdown r = { .reason = reason };
int rc; int rc;
rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
BUG_ON(rc); BUG_ON(rc);
} }
static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
{
xen_reboot(SHUTDOWN_reboot);
}
static void xen_power_off(void) static void xen_power_off(void)
{ {
struct sched_shutdown r = { .reason = SHUTDOWN_poweroff }; xen_reboot(SHUTDOWN_poweroff);
int rc;
rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
BUG_ON(rc);
} }
static irqreturn_t xen_arm_callback(int irq, void *arg) static irqreturn_t xen_arm_callback(int irq, void *arg)
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
struct dev_archdata { struct dev_archdata {
#ifdef CONFIG_IOMMU_API #ifdef CONFIG_IOMMU_API
void *iommu; /* private IOMMU data */ void *iommu; /* private IOMMU data */
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif #endif
bool dma_coherent; bool dma_coherent;
}; };
......
...@@ -27,11 +27,8 @@ ...@@ -27,11 +27,8 @@
#define DMA_ERROR_CODE (~(dma_addr_t)0) #define DMA_ERROR_CODE (~(dma_addr_t)0)
extern const struct dma_map_ops dummy_dma_ops; extern const struct dma_map_ops dummy_dma_ops;
static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{ {
if (dev && dev->dma_ops)
return dev->dma_ops;
/* /*
* We expect no ISA devices, and all other DMA masters are expected to * We expect no ISA devices, and all other DMA masters are expected to
* have someone call arch_setup_dma_ops at device creation time. * have someone call arch_setup_dma_ops at device creation time.
...@@ -39,14 +36,6 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) ...@@ -39,14 +36,6 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
return &dummy_dma_ops; return &dummy_dma_ops;
} }
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
if (xen_initial_domain())
return xen_dma_ops;
else
return __generic_dma_ops(NULL);
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent); const struct iommu_ops *iommu, bool coherent);
#define arch_setup_dma_ops arch_setup_dma_ops #define arch_setup_dma_ops arch_setup_dma_ops
......
...@@ -977,4 +977,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ...@@ -977,4 +977,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->archdata.dma_coherent = coherent; dev->archdata.dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu); __iommu_setup_dma_ops(dev, dma_base, size, iommu);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
dev->dma_ops = xen_dma_ops;
}
#endif
} }
...@@ -35,9 +35,6 @@ struct hypervisor_x86 { ...@@ -35,9 +35,6 @@ struct hypervisor_x86 {
/* Detection routine */ /* Detection routine */
uint32_t (*detect)(void); uint32_t (*detect)(void);
/* Adjust CPU feature bits (run once per CPU) */
void (*set_cpu_features)(struct cpuinfo_x86 *);
/* Platform setup (run once per boot) */ /* Platform setup (run once per boot) */
void (*init_platform)(void); void (*init_platform)(void);
...@@ -53,15 +50,14 @@ extern const struct hypervisor_x86 *x86_hyper; ...@@ -53,15 +50,14 @@ extern const struct hypervisor_x86 *x86_hyper;
/* Recognized hypervisors */ /* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen; extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm; extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void); extern void init_hypervisor_platform(void);
extern bool hypervisor_x2apic_available(void); extern bool hypervisor_x2apic_available(void);
extern void hypervisor_pin_vcpu(int cpu); extern void hypervisor_pin_vcpu(int cpu);
#else #else
static inline void init_hypervisor(struct cpuinfo_x86 *c) { }
static inline void init_hypervisor_platform(void) { } static inline void init_hypervisor_platform(void) { }
static inline bool hypervisor_x2apic_available(void) { return false; } static inline bool hypervisor_x2apic_available(void) { return false; }
#endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* CONFIG_HYPERVISOR_GUEST */
......
...@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) ...@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
/* No need for a barrier -- XCHG is a barrier on x86. */ /* No need for a barrier -- XCHG is a barrier on x86. */
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) #define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
extern int xen_have_vector_callback;
/*
* Events delivered via platform PCI interrupts are always
* routed to vcpu 0 and hence cannot be rebound.
*/
static inline bool xen_support_evtchn_rebind(void)
{
return (!xen_hvm_domain() || xen_have_vector_callback);
}
#endif /* _ASM_X86_XEN_EVENTS_H */ #endif /* _ASM_X86_XEN_EVENTS_H */
...@@ -52,12 +52,30 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); ...@@ -52,12 +52,30 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e); unsigned long pfn_e);
#ifdef CONFIG_XEN_PV
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_unmap_grant_ref *kunmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
#else
static inline int
set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{
return 0;
}
static inline int
clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count)
{
return 0;
}
#endif
/* /*
* Helper functions to write or read unsigned long values to/from * Helper functions to write or read unsigned long values to/from
...@@ -73,6 +91,7 @@ static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) ...@@ -73,6 +91,7 @@ static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
return __get_user(*val, (unsigned long __user *)addr); return __get_user(*val, (unsigned long __user *)addr);
} }
#ifdef CONFIG_XEN_PV
/* /*
* When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
* - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
...@@ -99,6 +118,12 @@ static inline unsigned long __pfn_to_mfn(unsigned long pfn) ...@@ -99,6 +118,12 @@ static inline unsigned long __pfn_to_mfn(unsigned long pfn)
return mfn; return mfn;
} }
#else
static inline unsigned long __pfn_to_mfn(unsigned long pfn)
{
return pfn;
}
#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline unsigned long pfn_to_mfn(unsigned long pfn)
{ {
......
...@@ -1149,7 +1149,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) ...@@ -1149,7 +1149,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
detect_ht(c); detect_ht(c);
#endif #endif
init_hypervisor(c);
x86_init_rdrand(c); x86_init_rdrand(c);
x86_init_cache_qos(c); x86_init_cache_qos(c);
setup_pku(c); setup_pku(c);
......
...@@ -28,8 +28,11 @@ ...@@ -28,8 +28,11 @@
static const __initconst struct hypervisor_x86 * const hypervisors[] = static const __initconst struct hypervisor_x86 * const hypervisors[] =
{ {
#ifdef CONFIG_XEN #ifdef CONFIG_XEN_PV
&x86_hyper_xen, &x86_hyper_xen_pv,
#endif
#ifdef CONFIG_XEN_PVHVM
&x86_hyper_xen_hvm,
#endif #endif
&x86_hyper_vmware, &x86_hyper_vmware,
&x86_hyper_ms_hyperv, &x86_hyper_ms_hyperv,
...@@ -60,12 +63,6 @@ detect_hypervisor_vendor(void) ...@@ -60,12 +63,6 @@ detect_hypervisor_vendor(void)
pr_info("Hypervisor detected: %s\n", x86_hyper->name); pr_info("Hypervisor detected: %s\n", x86_hyper->name);
} }
void init_hypervisor(struct cpuinfo_x86 *c)
{
if (x86_hyper && x86_hyper->set_cpu_features)
x86_hyper->set_cpu_features(c);
}
void __init init_hypervisor_platform(void) void __init init_hypervisor_platform(void)
{ {
...@@ -74,8 +71,6 @@ void __init init_hypervisor_platform(void) ...@@ -74,8 +71,6 @@ void __init init_hypervisor_platform(void)
if (!x86_hyper) if (!x86_hyper)
return; return;
init_hypervisor(&boot_cpu_data);
if (x86_hyper->init_platform) if (x86_hyper->init_platform)
x86_hyper->init_platform(); x86_hyper->init_platform();
} }
......
...@@ -113,6 +113,24 @@ static void __init vmware_paravirt_ops_setup(void) ...@@ -113,6 +113,24 @@ static void __init vmware_paravirt_ops_setup(void)
#define vmware_paravirt_ops_setup() do {} while (0) #define vmware_paravirt_ops_setup() do {} while (0)
#endif #endif
/*
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
* Still, due to timing difference when running on virtual cpus, the TSC can
* be marked as unstable in some cases. For example, the TSC sync check at
* bootup can fail due to a marginal offset between vcpus' TSCs (though the
* TSCs do not drift from each other). Also, the ACPI PM timer clocksource
* is not suitable as a watchdog when running on a hypervisor because the
* kernel may miss a wrap of the counter if the vcpu is descheduled for a
* long time. To skip these checks at runtime we set these capability bits,
* so that the kernel could just trust the hypervisor with providing a
* reliable virtual TSC that is suitable for timekeeping.
*/
static void __init vmware_set_capabilities(void)
{
setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
}
static void __init vmware_platform_setup(void) static void __init vmware_platform_setup(void)
{ {
uint32_t eax, ebx, ecx, edx; uint32_t eax, ebx, ecx, edx;
...@@ -152,6 +170,8 @@ static void __init vmware_platform_setup(void) ...@@ -152,6 +170,8 @@ static void __init vmware_platform_setup(void)
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
no_timer_check = 1; no_timer_check = 1;
#endif #endif
vmware_set_capabilities();
} }
/* /*
...@@ -176,24 +196,6 @@ static uint32_t __init vmware_platform(void) ...@@ -176,24 +196,6 @@ static uint32_t __init vmware_platform(void)
return 0; return 0;
} }
/*
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
* Still, due to timing difference when running on virtual cpus, the TSC can
* be marked as unstable in some cases. For example, the TSC sync check at
* bootup can fail due to a marginal offset between vcpus' TSCs (though the
* TSCs do not drift from each other). Also, the ACPI PM timer clocksource
* is not suitable as a watchdog when running on a hypervisor because the
* kernel may miss a wrap of the counter if the vcpu is descheduled for a
* long time. To skip these checks at runtime we set these capability bits,
* so that the kernel could just trust the hypervisor with providing a
* reliable virtual TSC that is suitable for timekeeping.
*/
static void vmware_set_cpu_features(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
}
/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
static bool __init vmware_legacy_x2apic_available(void) static bool __init vmware_legacy_x2apic_available(void)
{ {
...@@ -206,7 +208,6 @@ static bool __init vmware_legacy_x2apic_available(void) ...@@ -206,7 +208,6 @@ static bool __init vmware_legacy_x2apic_available(void)
const __refconst struct hypervisor_x86 x86_hyper_vmware = { const __refconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware", .name = "VMware",
.detect = vmware_platform, .detect = vmware_platform,
.set_cpu_features = vmware_set_cpu_features,
.init_platform = vmware_platform_setup, .init_platform = vmware_platform_setup,
.x2apic_available = vmware_legacy_x2apic_available, .x2apic_available = vmware_legacy_x2apic_available,
}; };
......
...@@ -446,7 +446,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -446,7 +446,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss); __switch_to_xtra(prev_p, next_p, tss);
#ifdef CONFIG_XEN #ifdef CONFIG_XEN_PV
/* /*
* On Xen PV, IOPL bits in pt_regs->flags have no effect, and * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
* current_pt_regs()->flags may not match the current task's * current_pt_regs()->flags may not match the current task's
......
...@@ -447,7 +447,7 @@ void __init xen_msi_init(void) ...@@ -447,7 +447,7 @@ void __init xen_msi_init(void)
int __init pci_xen_hvm_init(void) int __init pci_xen_hvm_init(void)
{ {
if (!xen_feature(XENFEAT_hvm_pirqs)) if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
return 0; return 0;
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
......
...@@ -6,8 +6,6 @@ config XEN ...@@ -6,8 +6,6 @@ config XEN
bool "Xen guest support" bool "Xen guest support"
depends on PARAVIRT depends on PARAVIRT
select PARAVIRT_CLOCK select PARAVIRT_CLOCK
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
depends on X86_64 || (X86_32 && X86_PAE) depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC depends on X86_LOCAL_APIC && X86_TSC
help help
...@@ -15,18 +13,41 @@ config XEN ...@@ -15,18 +13,41 @@ config XEN
kernel to boot in a paravirtualized environment under the kernel to boot in a paravirtualized environment under the
Xen hypervisor. Xen hypervisor.
config XEN_DOM0 config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
Support running as a Xen PV guest.
config XEN_PV_SMP
def_bool y def_bool y
depends on XEN && PCI_XEN && SWIOTLB_XEN depends on XEN_PV && SMP
config XEN_DOM0
bool "Xen PV Dom0 support"
default y
depends on XEN_PV && PCI_XEN && SWIOTLB_XEN
depends on X86_IO_APIC && ACPI && PCI depends on X86_IO_APIC && ACPI && PCI
help
Support running as a Xen PV Dom0 guest.
config XEN_PVHVM config XEN_PVHVM
def_bool y bool "Xen PVHVM guest support"
default y
depends on XEN && PCI && X86_LOCAL_APIC depends on XEN && PCI && X86_LOCAL_APIC
help
Support running as a Xen PVHVM guest.
config XEN_PVHVM_SMP
def_bool y
depends on XEN_PVHVM && SMP
config XEN_512GB config XEN_512GB
bool "Limit Xen pv-domain memory to 512GB" bool "Limit Xen pv-domain memory to 512GB"
depends on XEN && X86_64 depends on XEN_PV && X86_64
default y default y
help help
Limit paravirtualized user domains to 512GB of RAM. Limit paravirtualized user domains to 512GB of RAM.
......
...@@ -7,17 +7,23 @@ endif ...@@ -7,17 +7,23 @@ endif
# Make sure early boot has no stackprotector # Make sure early boot has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_enlighten.o := $(nostackp) CFLAGS_enlighten_pv.o := $(nostackp)
CFLAGS_mmu.o := $(nostackp) CFLAGS_mmu_pv.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ obj-y := enlighten.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \ time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \ grant-table.o suspend.o platform-pci-unplug.o
p2m.o apic.o pmu.o
obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o mmu_hvm.o suspend_hvm.o
obj-$(CONFIG_XEN_PV) += setup.o apic.o pmu.o suspend_pv.o \
p2m.o enlighten_pv.o mmu_pv.o
obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XEN_PV_SMP) += smp_pv.o
obj-$(CONFIG_XEN_PVHVM_SMP) += smp_hvm.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_XEN_DOM0) += vga.o
......
...@@ -81,7 +81,7 @@ static const struct efi efi_xen __initconst = { ...@@ -81,7 +81,7 @@ static const struct efi efi_xen __initconst = {
.update_capsule = xen_efi_update_capsule, .update_capsule = xen_efi_update_capsule,
.query_capsule_caps = xen_efi_query_capsule_caps, .query_capsule_caps = xen_efi_query_capsule_caps,
.get_next_high_mono_count = xen_efi_get_next_high_mono_count, .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
.reset_system = NULL, /* Functionality provided by Xen. */ .reset_system = xen_efi_reset_system,
.set_virtual_address_map = NULL, /* Not used under Xen. */ .set_virtual_address_map = NULL, /* Not used under Xen. */
.flags = 0 /* Initialized later. */ .flags = 0 /* Initialized later. */
}; };
......
This diff is collapsed.
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <xen/features.h>
#include <xen/events.h>
#include <xen/interface/memory.h>
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
#include "xen-ops.h"
#include "mmu.h"
#include "smp.h"
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
static struct shared_info *shared_info_page;
if (!shared_info_page)
shared_info_page = (struct shared_info *)
extend_brk(PAGE_SIZE, PAGE_SIZE);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
* page, we use it in the event channel upcall and in some pvclock
* related functions. We don't need the vcpu_info placement
* optimizations because we don't use any pv_mmu or pv_irq op on
* HVM.
* When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
* online but xen_hvm_init_shared_info is run at resume time too and
* in that case multiple vcpus might be online. */
for_each_online_cpu(cpu) {
/* Leave it to be NULL. */
if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
continue;
per_cpu(xen_vcpu, cpu) =
&HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
}
}
static void __init init_hvm_pv_info(void)
{
int major, minor;
uint32_t eax, ebx, ecx, edx, base;
base = xen_cpuid_base();
eax = cpuid_eax(base + 1);
major = eax >> 16;
minor = eax & 0xffff;
printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
xen_domain_type = XEN_HVM_DOMAIN;
/* PVH set up hypercall page in xen_prepare_pvh(). */
if (xen_pvh_domain())
pv_info.name = "Xen PVH";
else {
u64 pfn;
uint32_t msr;
pv_info.name = "Xen HVM";
msr = cpuid_ebx(base + 2);
pfn = __pa(hypercall_page);
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
}
xen_setup_features();
cpuid(base + 4, &eax, &ebx, &ecx, &edx);
if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
this_cpu_write(xen_vcpu_id, ebx);
else
this_cpu_write(xen_vcpu_id, smp_processor_id());
}
#ifdef CONFIG_KEXEC_CORE
static void xen_hvm_shutdown(void)
{
native_machine_shutdown();
if (kexec_in_progress)
xen_reboot(SHUTDOWN_soft_reset);
}
static void xen_hvm_crash_shutdown(struct pt_regs *regs)
{
native_machine_crash_shutdown(regs);
xen_reboot(SHUTDOWN_soft_reset);
}
#endif
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
{
int rc;
/*
* This can happen if CPU was offlined earlier and
* offlining timed out in common_cpu_die().
*/
if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
}
if (cpu_acpi_id(cpu) != U32_MAX)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
else
per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
rc = xen_smp_intr_init(cpu);
if (rc) {
WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
cpu, rc);
return rc;
}
return 0;
}
static int xen_cpu_dead_hvm(unsigned int cpu)
{
xen_smp_intr_free(cpu);
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_teardown_timer(cpu);
return 0;
}
static void __init xen_hvm_guest_init(void)
{
if (xen_pv_domain())
return;
init_hvm_pv_info();
xen_hvm_init_shared_info();
xen_panic_handler_init();
if (xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
xen_hvm_smp_init();
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
xen_unplug_emulated_devices();
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
if (xen_pvh_domain())
machine_ops.emergency_restart = xen_emergency_restart;
#ifdef CONFIG_KEXEC_CORE
machine_ops.shutdown = xen_hvm_shutdown;
machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
#endif
}
static bool xen_nopv;
static __init int xen_parse_nopv(char *arg)
{
xen_nopv = true;
return 0;
}
early_param("xen_nopv", xen_parse_nopv);
bool xen_hvm_need_lapic(void)
{
if (xen_nopv)
return false;
if (xen_pv_domain())
return false;
if (!xen_hvm_domain())
return false;
if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
return false;
return true;
}
EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static uint32_t __init xen_platform_hvm(void)
{
if (xen_pv_domain() || xen_nopv)
return 0;
return xen_cpuid_base();
}
const struct hypervisor_x86 x86_hyper_xen_hvm = {
.name = "Xen HVM",
.detect = xen_platform_hvm,
.init_platform = xen_hvm_guest_init,
.pin_vcpu = xen_pin_vcpu,
.x2apic_available = xen_x2apic_para_available,
};
EXPORT_SYMBOL(x86_hyper_xen_hvm);
This diff is collapsed.
#include <linux/acpi.h>
#include <xen/hvc-console.h>
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
#include <xen/interface/memory.h>
#include <xen/interface/hvm/start_info.h>
/*
* PVH variables.
*
* xen_pvh and pvh_bootparams need to live in data segment since they
* are used after startup_{32|64}, which clear .bss, are invoked.
*/
bool xen_pvh __attribute__((section(".data"))) = 0;
struct boot_params pvh_bootparams __attribute__((section(".data")));
struct hvm_start_info pvh_start_info;
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
static void xen_pvh_arch_setup(void)
{
/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
if (nr_ioapics == 0)
acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
}
static void __init init_pvh_bootparams(void)
{
struct xen_memory_map memmap;
int rc;
memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
if (rc) {
xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
BUG();
}
pvh_bootparams.e820_entries = memmap.nr_entries;
if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
ISA_START_ADDRESS;
pvh_bootparams.e820_table[pvh_bootparams.e820_entries].size =
ISA_END_ADDRESS - ISA_START_ADDRESS;
pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
E820_TYPE_RESERVED;
pvh_bootparams.e820_entries++;
} else
xen_raw_printk("Warning: Can fit ISA range into e820\n");
pvh_bootparams.hdr.cmd_line_ptr =
pvh_start_info.cmdline_paddr;
/* The first module is always ramdisk. */
if (pvh_start_info.nr_modules) {
struct hvm_modlist_entry *modaddr =
__va(pvh_start_info.modlist_paddr);
pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
pvh_bootparams.hdr.ramdisk_size = modaddr->size;
}
/*
* See Documentation/x86/boot.txt.
*
* Version 2.12 supports Xen entry point but we will use default x86/PC
* environment (i.e. hardware_subarch 0).
*/
pvh_bootparams.hdr.version = 0x212;
pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
}
/*
* This routine (and those that it might call) should not use
* anything that lives in .bss since that segment will be cleared later.
*/
void __init xen_prepare_pvh(void)
{
u32 msr;
u64 pfn;
if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
pvh_start_info.magic);
BUG();
}
xen_pvh = 1;
msr = cpuid_ebx(xen_cpuid_base() + 2);
pfn = __pa(hypercall_page);
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
init_pvh_bootparams();
x86_init.oem.arch_setup = xen_pvh_arch_setup;
}
This diff is collapsed.
#include <linux/types.h>
#include <linux/crash_dump.h>
#include <xen/interface/xen.h>
#include <xen/hvm.h>
#include "mmu.h"
#ifdef CONFIG_PROC_VMCORE
/*
* This function is used in two contexts:
* - the kdump kernel has to check whether a pfn of the crashed kernel
* was a ballooned page. vmcore is using this function to decide
* whether to access a pfn of the crashed kernel.
* - the kexec kernel has to check whether a pfn was ballooned by the
* previous kernel. If the pfn is ballooned, handle it properly.
* Returns 0 if the pfn is not backed by a RAM page, the caller may
* handle the pfn special in this case.
*/
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
{
struct xen_hvm_get_mem_type a = {
.domid = DOMID_SELF,
.pfn = pfn,
};
int ram;
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
return -ENXIO;
switch (a.mem_type) {
case HVMMEM_mmio_dm:
ram = 0;
break;
case HVMMEM_ram_rw:
case HVMMEM_ram_ro:
default:
ram = 1;
break;
}
return ram;
}
#endif
static void xen_hvm_exit_mmap(struct mm_struct *mm)
{
struct xen_hvm_pagetable_dying a;
int rc;
a.domid = DOMID_SELF;
a.gpa = __pa(mm->pgd);
rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
WARN_ON_ONCE(rc < 0);
}
static int is_pagetable_dying_supported(void)
{
struct xen_hvm_pagetable_dying a;
int rc = 0;
a.domid = DOMID_SELF;
a.gpa = 0x00;
rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
if (rc < 0) {
printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
return 0;
}
return 1;
}
void __init xen_hvm_init_mmu_ops(void)
{
if (is_pagetable_dying_supported())
pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
#endif
}
This diff is collapsed.
...@@ -4,8 +4,13 @@ ...@@ -4,8 +4,13 @@
#include <xen/interface/xenpmu.h> #include <xen/interface/xenpmu.h>
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
#ifdef CONFIG_XEN_HAVE_VPMU
void xen_pmu_init(int cpu); void xen_pmu_init(int cpu);
void xen_pmu_finish(int cpu); void xen_pmu_finish(int cpu);
#else
static inline void xen_pmu_init(int cpu) {}
static inline void xen_pmu_finish(int cpu) {}
#endif
bool is_xen_pmu(int cpu); bool is_xen_pmu(int cpu);
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
......
This diff is collapsed.
...@@ -11,7 +11,17 @@ extern void xen_send_IPI_self(int vector); ...@@ -11,7 +11,17 @@ extern void xen_send_IPI_self(int vector);
extern int xen_smp_intr_init(unsigned int cpu); extern int xen_smp_intr_init(unsigned int cpu);
extern void xen_smp_intr_free(unsigned int cpu); extern void xen_smp_intr_free(unsigned int cpu);
int xen_smp_intr_init_pv(unsigned int cpu);
void xen_smp_intr_free_pv(unsigned int cpu);
void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
struct xen_common_irq {
int irq;
char *name;
};
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
static inline int xen_smp_intr_init(unsigned int cpu) static inline int xen_smp_intr_init(unsigned int cpu)
...@@ -19,6 +29,12 @@ static inline int xen_smp_intr_init(unsigned int cpu) ...@@ -19,6 +29,12 @@ static inline int xen_smp_intr_init(unsigned int cpu)
return 0; return 0;
} }
static inline void xen_smp_intr_free(unsigned int cpu) {} static inline void xen_smp_intr_free(unsigned int cpu) {}
static inline int xen_smp_intr_init_pv(unsigned int cpu)
{
return 0;
}
static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif #endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#include <linux/types.h>
#include <xen/xen.h>
#include <xen/features.h>
#include <xen/interface/features.h>
#include "xen-ops.h"
void xen_hvm_post_suspend(int suspend_cancelled)
{
int cpu;
if (!suspend_cancelled)
xen_hvm_init_shared_info();
xen_callback_vector();
xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
for_each_online_cpu(cpu) {
xen_setup_runstate_info(cpu);
}
}
}
This diff is collapsed.
...@@ -436,6 +436,14 @@ static void xen_hvm_setup_cpu_clockevents(void) ...@@ -436,6 +436,14 @@ static void xen_hvm_setup_cpu_clockevents(void)
void __init xen_hvm_init_time_ops(void) void __init xen_hvm_init_time_ops(void)
{ {
/*
* vector callback is needed otherwise we cannot receive interrupts
* on cpu > 0 and at this point we don't know how many cpus are
* available.
*/
if (!xen_have_vector_callback)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
printk(KERN_INFO "Xen doesn't support pvclock on HVM," printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
"disable pv timer\n"); "disable pv timer\n");
......
This diff is collapsed.
This diff is collapsed.
...@@ -434,7 +434,7 @@ static int map_data_for_request(struct vscsifrnt_info *info, ...@@ -434,7 +434,7 @@ static int map_data_for_request(struct vscsifrnt_info *info,
if (seg_grants) { if (seg_grants) {
page = virt_to_page(seg); page = virt_to_page(seg);
off = (unsigned long)seg & ~PAGE_MASK; off = offset_in_page(seg);
len = sizeof(struct scsiif_request_segment) * data_grants; len = sizeof(struct scsiif_request_segment) * data_grants;
while (len > 0) { while (len > 0) {
bytes = min_t(unsigned int, len, PAGE_SIZE - off); bytes = min_t(unsigned int, len, PAGE_SIZE - off);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment