Merge tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross: "Xen fixes and featrues for 4.12. The main changes are: - enable building the kernel with Xen support but without enabling paravirtualized mode (Vitaly Kuznetsov) - add a new 9pfs xen frontend driver (Stefano Stabellini) - simplify Xen's cpuid handling by making use of cpu capabilities (Juergen Gross) - add/modify some headers for new Xen paravirtualized devices (Oleksandr Andrushchenko) - EFI reset_system support under Xen (Julien Grall) - and the usual cleanups and corrections" * tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (57 commits) xen: Move xen_have_vector_callback definition to enlighten.c xen: Implement EFI reset_system callback arm/xen: Consolidate calls to shutdown hypercall in a single helper xen: Export xen_reboot xen/x86: Call xen_smp_intr_init_pv() on BSP xen: Revert commits da72ff5b and 72a9b186 xen/pvh: Do not fill kernel's e820 map in init_pvh_bootparams() xen/scsifront: use offset_in_page() macro xen/arm,arm64: rename __generic_dma_ops to xen_get_dma_ops xen/arm,arm64: fix xen_dma_ops after 815dd187 "Consolidate get_dma_ops..." xen/9pfs: select CONFIG_XEN_XENBUS_FRONTEND x86/cpu: remove hypervisor specific set_cpu_features vmware: set cpu capabilities during platform initialization x86/xen: use capabilities instead of fake cpuid values for xsave x86/xen: use capabilities instead of fake cpuid values for x2apic x86/xen: use capabilities instead of fake cpuid values for mwait x86/xen: use capabilities instead of fake cpuid values for acpi x86/xen: use capabilities instead of fake cpuid values for acc x86/xen: use capabilities instead of fake cpuid values for mtrr x86/xen: use capabilities instead of fake cpuid values for aperf ...

Merge tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross: "Xen fixes and featrues for 4.12. The main changes are: - enable building the kernel with Xen support but without enabling paravirtualized mode (Vitaly Kuznetsov) - add a new 9pfs xen frontend driver (Stefano Stabellini) - simplify Xen's cpuid handling by making use of cpu capabilities (Juergen Gross) - add/modify some headers for new Xen paravirtualized devices (Oleksandr Andrushchenko) - EFI reset_system support under Xen (Julien Grall) - and the usual cleanups and corrections" * tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (57 commits) xen: Move xen_have_vector_callback definition to enlighten.c xen: Implement EFI reset_system callback arm/xen: Consolidate calls to shutdown hypercall in a single helper xen: Export xen_reboot xen/x86: Call xen_smp_intr_init_pv() on BSP xen: Revert commits da72ff5b and 72a9b186 xen/pvh: Do not fill kernel's e820 map in init_pvh_bootparams() xen/scsifront: use offset_in_page() macro xen/arm,arm64: rename __generic_dma_ops to xen_get_dma_ops xen/arm,arm64: fix xen_dma_ops after 815dd187 "Consolidate get_dma_ops..." xen/9pfs: select CONFIG_XEN_XENBUS_FRONTEND x86/cpu: remove hypervisor specific set_cpu_features vmware: set cpu capabilities during platform initialization x86/xen: use capabilities instead of fake cpuid values for xsave x86/xen: use capabilities instead of fake cpuid values for x2apic x86/xen: use capabilities instead of fake cpuid values for mwait x86/xen: use capabilities instead of fake cpuid values for acpi x86/xen: use capabilities instead of fake cpuid values for acc x86/xen: use capabilities instead of fake cpuid values for mtrr x86/xen: use capabilities instead of fake cpuid values for aperf ...
a9648072 · Linus Torvalds · a1be8edd · 3dbd8204 · a9648072 · a9648072
Commit a9648072 authored May 04, 2017 by Linus Torvalds
53 changed files
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -15,6 +15,9 @@ struct dev_archdata {
 #endif
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
 	struct dma_iommu_mapping	*mapping;
+#endif
+#ifdef CONFIG_XEN
+	const struct dma_map_ops *dev_dma_ops;
 #endif
 	bool dma_coherent;
 };

--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -16,19 +16,9 @@
 extern const struct dma_map_ops arm_dma_ops;
 extern const struct dma_map_ops arm_coherent_dma_ops;
-static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
-{
-	if (dev && dev->dma_ops)
-		return dev->dma_ops;
-	return &arm_dma_ops;
-}
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-	if (xen_initial_domain())
+	return &arm_dma_ops;
-		return xen_dma_ops;
-	else
-		return __generic_dma_ops(NULL);
 }
 #define HAVE_ARCH_DMA_SUPPORTED 1

--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2414,6 +2414,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		dma_ops = arm_get_dma_map_ops(coherent);
 	set_dma_ops(dev, dma_ops);
+#ifdef CONFIG_XEN
+	if (xen_initial_domain()) {
+		dev->archdata.dev_dma_ops = dev->dma_ops;
+		dev->dma_ops = xen_dma_ops;
+	}
+#endif
 }
 void arch_teardown_dma_ops(struct device *dev)

--- a/arch/arm/xen/efi.c
+++ b/arch/arm/xen/efi.c
@@ -35,6 +35,6 @@ void __init xen_efi_runtime_setup(void)
 	efi.update_capsule           = xen_efi_update_capsule;
 	efi.query_capsule_caps       = xen_efi_query_capsule_caps;
 	efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
-	efi.reset_system             = NULL; /* Functionality provided by Xen. */
+	efi.reset_system             = xen_efi_reset_system;
 }
 EXPORT_SYMBOL_GPL(xen_efi_runtime_setup);
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -191,20 +191,24 @@ static int xen_dying_cpu(unsigned int cpu)
 	return 0;
 }
-static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
+void xen_reboot(int reason)
 {
-	struct sched_shutdown r = { .reason = SHUTDOWN_reboot };
+	struct sched_shutdown r = { .reason = reason };
 	int rc;
 	rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
 	BUG_ON(rc);
 }
+static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
+{
+	xen_reboot(SHUTDOWN_reboot);
+}
 static void xen_power_off(void)
 {
-	struct sched_shutdown r = { .reason = SHUTDOWN_poweroff };
+	xen_reboot(SHUTDOWN_poweroff);
-	int rc;
-	rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
-	BUG_ON(rc);
 }
 static irqreturn_t xen_arm_callback(int irq, void *arg)

--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -19,6 +19,9 @@
 struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu;			/* private IOMMU data */
+#endif
+#ifdef CONFIG_XEN
+	const struct dma_map_ops *dev_dma_ops;
 #endif
 	bool dma_coherent;
 };

--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -27,11 +27,8 @@
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
 extern const struct dma_map_ops dummy_dma_ops;
-static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-	if (dev && dev->dma_ops)
-		return dev->dma_ops;
 	/*
 	 * We expect no ISA devices, and all other DMA masters are expected to
 	 * have someone call arch_setup_dma_ops at device creation time.
@@ -39,14 +36,6 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
 	return &dummy_dma_ops;
 }
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	if (xen_initial_domain())
-		return xen_dma_ops;
-	else
-		return __generic_dma_ops(NULL);
-}
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent);
 #define arch_setup_dma_ops	arch_setup_dma_ops

--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -977,4 +977,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	dev->archdata.dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
+#ifdef CONFIG_XEN
+	if (xen_initial_domain()) {
+		dev->archdata.dev_dma_ops = dev->dma_ops;
+		dev->dma_ops = xen_dma_ops;
+	}
+#endif
 }
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -35,9 +35,6 @@ struct hypervisor_x86 {
 	/* Detection routine */
 	uint32_t	(*detect)(void);
-	/* Adjust CPU feature bits (run once per CPU) */
-	void		(*set_cpu_features)(struct cpuinfo_x86 *);
 	/* Platform setup (run once per boot) */
 	void		(*init_platform)(void);
@@ -53,15 +50,14 @@ extern const struct hypervisor_x86 *x86_hyper;
 /* Recognized hypervisors */
 extern const struct hypervisor_x86 x86_hyper_vmware;
 extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_xen_hvm;
 extern const struct hypervisor_x86 x86_hyper_kvm;
-extern void init_hypervisor(struct cpuinfo_x86 *c);
 extern void init_hypervisor_platform(void);
 extern bool hypervisor_x2apic_available(void);
 extern void hypervisor_pin_vcpu(int cpu);
 #else
-static inline void init_hypervisor(struct cpuinfo_x86 *c) { }
 static inline void init_hypervisor_platform(void) { }
 static inline bool hypervisor_x2apic_available(void) { return false; }
 #endif /* CONFIG_HYPERVISOR_GUEST */

--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
 /* No need for a barrier -- XCHG is a barrier on x86. */
 #define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
+extern int xen_have_vector_callback;
+/*
+ * Events delivered via platform PCI interrupts are always
+ * routed to vcpu 0 and hence cannot be rebound.
+ */
+static inline bool xen_support_evtchn_rebind(void)
+{
+	return (!xen_hvm_domain() || xen_have_vector_callback);
+}
 #endif /* _ASM_X86_XEN_EVENTS_H */
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -52,12 +52,30 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 						    unsigned long pfn_e);
+#ifdef CONFIG_XEN_PV
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 				   struct gnttab_map_grant_ref *kmap_ops,
 				   struct page **pages, unsigned int count);
 extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 				     struct gnttab_unmap_grant_ref *kunmap_ops,
 				     struct page **pages, unsigned int count);
+#else
+static inline int
+set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+			struct gnttab_map_grant_ref *kmap_ops,
+			struct page **pages, unsigned int count)
+{
+	return 0;
+}
+static inline int
+clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+			  struct gnttab_unmap_grant_ref *kunmap_ops,
+			  struct page **pages, unsigned int count)
+{
+	return 0;
+}
+#endif
 /*
 * Helper functions to write or read unsigned long values to/from
@@ -73,6 +91,7 @@ static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
 	return __get_user(*val, (unsigned long __user *)addr);
 }
+#ifdef CONFIG_XEN_PV
 /*
 * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
 * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
@@ -99,6 +118,12 @@ static inline unsigned long __pfn_to_mfn(unsigned long pfn)
 	return mfn;
 }
+#else
+static inline unsigned long __pfn_to_mfn(unsigned long pfn)
+{
+	return pfn;
+}
+#endif
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {

--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1149,7 +1149,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	detect_ht(c);
 #endif
-	init_hypervisor(c);
 	x86_init_rdrand(c);
 	x86_init_cache_qos(c);
 	setup_pku(c);

--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -28,8 +28,11 @@
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
-	&x86_hyper_xen,
+	&x86_hyper_xen_pv,
+#endif
+#ifdef CONFIG_XEN_PVHVM
+	&x86_hyper_xen_hvm,
 #endif
 	&x86_hyper_vmware,
 	&x86_hyper_ms_hyperv,
@@ -60,12 +63,6 @@ detect_hypervisor_vendor(void)
 		pr_info("Hypervisor detected: %s\n", x86_hyper->name);
 }
-void init_hypervisor(struct cpuinfo_x86 *c)
-{
-	if (x86_hyper && x86_hyper->set_cpu_features)
-		x86_hyper->set_cpu_features(c);
-}
 void __init init_hypervisor_platform(void)
 {
@@ -74,8 +71,6 @@ void __init init_hypervisor_platform(void)
 	if (!x86_hyper)
 		return;
-	init_hypervisor(&boot_cpu_data);
 	if (x86_hyper->init_platform)
 		x86_hyper->init_platform();
 }

--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -113,6 +113,24 @@ static void __init vmware_paravirt_ops_setup(void)
 #define vmware_paravirt_ops_setup() do {} while (0)
 #endif
+/*
+ * VMware hypervisor takes care of exporting a reliable TSC to the guest.
+ * Still, due to timing difference when running on virtual cpus, the TSC can
+ * be marked as unstable in some cases. For example, the TSC sync check at
+ * bootup can fail due to a marginal offset between vcpus' TSCs (though the
+ * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
+ * is not suitable as a watchdog when running on a hypervisor because the
+ * kernel may miss a wrap of the counter if the vcpu is descheduled for a
+ * long time. To skip these checks at runtime we set these capability bits,
+ * so that the kernel could just trust the hypervisor with providing a
+ * reliable virtual TSC that is suitable for timekeeping.
+ */
+static void __init vmware_set_capabilities(void)
+{
+	setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
+	setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+}
 static void __init vmware_platform_setup(void)
 {
 	uint32_t eax, ebx, ecx, edx;
@@ -152,6 +170,8 @@ static void __init vmware_platform_setup(void)
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
 #endif
+	vmware_set_capabilities();
 }
 /*
@@ -176,24 +196,6 @@ static uint32_t __init vmware_platform(void)
 	return 0;
 }
-/*
- * VMware hypervisor takes care of exporting a reliable TSC to the guest.
- * Still, due to timing difference when running on virtual cpus, the TSC can
- * be marked as unstable in some cases. For example, the TSC sync check at
- * bootup can fail due to a marginal offset between vcpus' TSCs (though the
- * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
- * is not suitable as a watchdog when running on a hypervisor because the
- * kernel may miss a wrap of the counter if the vcpu is descheduled for a
- * long time. To skip these checks at runtime we set these capability bits,
- * so that the kernel could just trust the hypervisor with providing a
- * reliable virtual TSC that is suitable for timekeeping.
- */
-static void vmware_set_cpu_features(struct cpuinfo_x86 *c)
-{
-	set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-	set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
-}
 /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
 static bool __init vmware_legacy_x2apic_available(void)
 {
@@ -206,7 +208,6 @@ static bool __init vmware_legacy_x2apic_available(void)
 const __refconst struct hypervisor_x86 x86_hyper_vmware = {
 	.name			= "VMware",
 	.detect			= vmware_platform,
-	.set_cpu_features	= vmware_set_cpu_features,
 	.init_platform		= vmware_platform_setup,
 	.x2apic_available	= vmware_legacy_x2apic_available,
 };

--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -446,7 +446,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 	/*
 	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
 	 * current_pt_regs()->flags may not match the current task's

--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -447,7 +447,7 @@ void __init xen_msi_init(void)
 int __init pci_xen_hvm_init(void)
 {
-	if (!xen_feature(XENFEAT_hvm_pirqs))
+	if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
 		return 0;
 #ifdef CONFIG_ACPI

--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,8 +6,6 @@ config XEN
 	bool "Xen guest support"
 	depends on PARAVIRT
 	select PARAVIRT_CLOCK
-	select XEN_HAVE_PVMMU
-	select XEN_HAVE_VPMU
 	depends on X86_64 || (X86_32 && X86_PAE)
 	depends on X86_LOCAL_APIC && X86_TSC
 	help
@@ -15,18 +13,41 @@ config XEN
 	  kernel to boot in a paravirtualized environment under the
 	  Xen hypervisor.
-config XEN_DOM0
+config XEN_PV
+	bool "Xen PV guest support"
+	default y
+	depends on XEN
+	select XEN_HAVE_PVMMU
+	select XEN_HAVE_VPMU
+	help
+	  Support running as a Xen PV guest.
+config XEN_PV_SMP
 	def_bool y
-	depends on XEN && PCI_XEN && SWIOTLB_XEN
+	depends on XEN_PV && SMP
+config XEN_DOM0
+	bool "Xen PV Dom0 support"
+	default y
+	depends on XEN_PV && PCI_XEN && SWIOTLB_XEN
 	depends on X86_IO_APIC && ACPI && PCI
+	help
+	  Support running as a Xen PV Dom0 guest.
 config XEN_PVHVM
-	def_bool y
+	bool "Xen PVHVM guest support"
+	default y
 	depends on XEN && PCI && X86_LOCAL_APIC
+	help
+	  Support running as a Xen PVHVM guest.
+config XEN_PVHVM_SMP
+	def_bool y
+	depends on XEN_PVHVM && SMP
 config XEN_512GB
 	bool "Limit Xen pv-domain memory to 512GB"
-	depends on XEN && X86_64
+	depends on XEN_PV && X86_64
 	default y
 	help
 	  Limit paravirtualized user domains to 512GB of RAM.

--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -7,17 +7,23 @@ endif
 # Make sure early boot has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_enlighten.o		:= $(nostackp)
+CFLAGS_enlighten_pv.o		:= $(nostackp)
-CFLAGS_mmu.o			:= $(nostackp)
+CFLAGS_mmu_pv.o		:= $(nostackp)
-obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
+obj-y		:= enlighten.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
-			grant-table.o suspend.o platform-pci-unplug.o \
+			grant-table.o suspend.o platform-pci-unplug.o
-			p2m.o apic.o pmu.o
+obj-$(CONFIG_XEN_PVHVM)		+= enlighten_hvm.o mmu_hvm.o suspend_hvm.o
+obj-$(CONFIG_XEN_PV)			+= setup.o apic.o pmu.o suspend_pv.o \
+						p2m.o enlighten_pv.o mmu_pv.o
+obj-$(CONFIG_XEN_PVH)			+= enlighten_pvh.o
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_XEN_PV_SMP)  	+= smp_pv.o
+obj-$(CONFIG_XEN_PVHVM_SMP)  	+= smp_hvm.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
 obj-$(CONFIG_XEN_DOM0)		+= vga.o

--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -81,7 +81,7 @@ static const struct efi efi_xen __initconst = {
 	.update_capsule           = xen_efi_update_capsule,
 	.query_capsule_caps       = xen_efi_query_capsule_caps,
 	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
-	.reset_system             = NULL, /* Functionality provided by Xen. */
+	.reset_system             = xen_efi_reset_system,
 	.set_virtual_address_map  = NULL, /* Not used under Xen. */
 	.flags			  = 0     /* Initialized later. */
 };

--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
+#include <linux/cpu.h>
+#include <linux/kexec.h>
+#include <xen/features.h>
+#include <xen/events.h>
+#include <xen/interface/memory.h>
+#include <asm/cpu.h>
+#include <asm/smp.h>
+#include <asm/reboot.h>
+#include <asm/setup.h>
+#include <asm/hypervisor.h>
+#include <asm/xen/cpuid.h>
+#include <asm/xen/hypervisor.h>
+#include "xen-ops.h"
+#include "mmu.h"
+#include "smp.h"
+void __ref xen_hvm_init_shared_info(void)
+{
+	int cpu;
+	struct xen_add_to_physmap xatp;
+	static struct shared_info *shared_info_page;
+	if (!shared_info_page)
+		shared_info_page = (struct shared_info *)
+			extend_brk(PAGE_SIZE, PAGE_SIZE);
+	xatp.domid = DOMID_SELF;
+	xatp.idx = 0;
+	xatp.space = XENMAPSPACE_shared_info;
+	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+		BUG();
+	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
+	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+	 * page, we use it in the event channel upcall and in some pvclock
+	 * related functions. We don't need the vcpu_info placement
+	 * optimizations because we don't use any pv_mmu or pv_irq op on
+	 * HVM.
+	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+	 * online but xen_hvm_init_shared_info is run at resume time too and
+	 * in that case multiple vcpus might be online. */
+	for_each_online_cpu(cpu) {
+		/* Leave it to be NULL. */
+		if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
+			continue;
+		per_cpu(xen_vcpu, cpu) =
+			&HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
+	}
+}
+static void __init init_hvm_pv_info(void)
+{
+	int major, minor;
+	uint32_t eax, ebx, ecx, edx, base;
+	base = xen_cpuid_base();
+	eax = cpuid_eax(base + 1);
+	major = eax >> 16;
+	minor = eax & 0xffff;
+	printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
+	xen_domain_type = XEN_HVM_DOMAIN;
+	/* PVH set up hypercall page in xen_prepare_pvh(). */
+	if (xen_pvh_domain())
+		pv_info.name = "Xen PVH";
+	else {
+		u64 pfn;
+		uint32_t msr;
+		pv_info.name = "Xen HVM";
+		msr = cpuid_ebx(base + 2);
+		pfn = __pa(hypercall_page);
+		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+	}
+	xen_setup_features();
+	cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+	if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+		this_cpu_write(xen_vcpu_id, ebx);
+	else
+		this_cpu_write(xen_vcpu_id, smp_processor_id());
+}
+#ifdef CONFIG_KEXEC_CORE
+static void xen_hvm_shutdown(void)
+{
+	native_machine_shutdown();
+	if (kexec_in_progress)
+		xen_reboot(SHUTDOWN_soft_reset);
+}
+static void xen_hvm_crash_shutdown(struct pt_regs *regs)
+{
+	native_machine_crash_shutdown(regs);
+	xen_reboot(SHUTDOWN_soft_reset);
+}
+#endif
+static int xen_cpu_up_prepare_hvm(unsigned int cpu)
+{
+	int rc;
+	/*
+	 * This can happen if CPU was offlined earlier and
+	 * offlining timed out in common_cpu_die().
+	 */
+	if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+	}
+	if (cpu_acpi_id(cpu) != U32_MAX)
+		per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+	else
+		per_cpu(xen_vcpu_id, cpu) = cpu;
+	xen_vcpu_setup(cpu);
+	if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
+		xen_setup_timer(cpu);
+	rc = xen_smp_intr_init(cpu);
+	if (rc) {
+		WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
+		     cpu, rc);
+		return rc;
+	}
+	return 0;
+}
+static int xen_cpu_dead_hvm(unsigned int cpu)
+{
+	xen_smp_intr_free(cpu);
+	if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
+		xen_teardown_timer(cpu);
+       return 0;
+}
+static void __init xen_hvm_guest_init(void)
+{
+	if (xen_pv_domain())
+		return;
+	init_hvm_pv_info();
+	xen_hvm_init_shared_info();
+	xen_panic_handler_init();
+	if (xen_feature(XENFEAT_hvm_callback_vector))
+		xen_have_vector_callback = 1;
+	xen_hvm_smp_init();
+	WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
+	xen_unplug_emulated_devices();
+	x86_init.irqs.intr_init = xen_init_IRQ;
+	xen_hvm_init_time_ops();
+	xen_hvm_init_mmu_ops();
+	if (xen_pvh_domain())
+		machine_ops.emergency_restart = xen_emergency_restart;
+#ifdef CONFIG_KEXEC_CORE
+	machine_ops.shutdown = xen_hvm_shutdown;
+	machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
+#endif
+}
+static bool xen_nopv;
+static __init int xen_parse_nopv(char *arg)
+{
+       xen_nopv = true;
+       return 0;
+}
+early_param("xen_nopv", xen_parse_nopv);
+bool xen_hvm_need_lapic(void)
+{
+	if (xen_nopv)
+		return false;
+	if (xen_pv_domain())
+		return false;
+	if (!xen_hvm_domain())
+		return false;
+	if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
+static uint32_t __init xen_platform_hvm(void)
+{
+	if (xen_pv_domain() || xen_nopv)
+		return 0;
+	return xen_cpuid_base();
+}
+const struct hypervisor_x86 x86_hyper_xen_hvm = {
+	.name                   = "Xen HVM",
+	.detect                 = xen_platform_hvm,
+	.init_platform          = xen_hvm_guest_init,
+	.pin_vcpu               = xen_pin_vcpu,
+	.x2apic_available       = xen_x2apic_para_available,
+};
+EXPORT_SYMBOL(x86_hyper_xen_hvm);
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
+#include <linux/acpi.h>
+#include <xen/hvc-console.h>
+#include <asm/io_apic.h>
+#include <asm/hypervisor.h>
+#include <asm/e820/api.h>
+#include <asm/xen/interface.h>
+#include <asm/xen/hypercall.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/hvm/start_info.h>
+/*
+ * PVH variables.
+ *
+ * xen_pvh and pvh_bootparams need to live in data segment since they
+ * are used after startup_{32|64}, which clear .bss, are invoked.
+ */
+bool xen_pvh __attribute__((section(".data"))) = 0;
+struct boot_params pvh_bootparams __attribute__((section(".data")));
+struct hvm_start_info pvh_start_info;
+unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
+static void xen_pvh_arch_setup(void)
+{
+	/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
+	if (nr_ioapics == 0)
+		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+}
+static void __init init_pvh_bootparams(void)
+{
+	struct xen_memory_map memmap;
+	int rc;
+	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
+	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
+	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
+	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+	if (rc) {
+		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+		BUG();
+	}
+	pvh_bootparams.e820_entries = memmap.nr_entries;
+	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
+		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
+			ISA_START_ADDRESS;
+		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].size =
+			ISA_END_ADDRESS - ISA_START_ADDRESS;
+		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
+			E820_TYPE_RESERVED;
+		pvh_bootparams.e820_entries++;
+	} else
+		xen_raw_printk("Warning: Can fit ISA range into e820\n");
+	pvh_bootparams.hdr.cmd_line_ptr =
+		pvh_start_info.cmdline_paddr;
+	/* The first module is always ramdisk. */
+	if (pvh_start_info.nr_modules) {
+		struct hvm_modlist_entry *modaddr =
+			__va(pvh_start_info.modlist_paddr);
+		pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
+		pvh_bootparams.hdr.ramdisk_size = modaddr->size;
+	}
+	/*
+	 * See Documentation/x86/boot.txt.
+	 *
+	 * Version 2.12 supports Xen entry point but we will use default x86/PC
+	 * environment (i.e. hardware_subarch 0).
+	 */
+	pvh_bootparams.hdr.version = 0x212;
+	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+}
+/*
+ * This routine (and those that it might call) should not use
+ * anything that lives in .bss since that segment will be cleared later.
+ */
+void __init xen_prepare_pvh(void)
+{
+	u32 msr;
+	u64 pfn;
+	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
+		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
+				pvh_start_info.magic);
+		BUG();
+	}
+	xen_pvh = 1;
+	msr = cpuid_ebx(xen_cpuid_base() + 2);
+	pfn = __pa(hypercall_page);
+	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+	init_pvh_bootparams();
+	x86_init.oem.arch_setup = xen_pvh_arch_setup;
+}
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu_hvm.c
+++ b/arch/x86/xen/mmu_hvm.c
+#include <linux/types.h>
+#include <linux/crash_dump.h>
+#include <xen/interface/xen.h>
+#include <xen/hvm.h>
+#include "mmu.h"
+#ifdef CONFIG_PROC_VMCORE
+/*
+ * This function is used in two contexts:
+ * - the kdump kernel has to check whether a pfn of the crashed kernel
+ *   was a ballooned page. vmcore is using this function to decide
+ *   whether to access a pfn of the crashed kernel.
+ * - the kexec kernel has to check whether a pfn was ballooned by the
+ *   previous kernel. If the pfn is ballooned, handle it properly.
+ * Returns 0 if the pfn is not backed by a RAM page, the caller may
+ * handle the pfn special in this case.
+ */
+static int xen_oldmem_pfn_is_ram(unsigned long pfn)
+{
+	struct xen_hvm_get_mem_type a = {
+		.domid = DOMID_SELF,
+		.pfn = pfn,
+	};
+	int ram;
+	if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
+		return -ENXIO;
+	switch (a.mem_type) {
+	case HVMMEM_mmio_dm:
+		ram = 0;
+		break;
+	case HVMMEM_ram_rw:
+	case HVMMEM_ram_ro:
+	default:
+		ram = 1;
+		break;
+	}
+	return ram;
+}
+#endif
+static void xen_hvm_exit_mmap(struct mm_struct *mm)
+{
+	struct xen_hvm_pagetable_dying a;
+	int rc;
+	a.domid = DOMID_SELF;
+	a.gpa = __pa(mm->pgd);
+	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
+	WARN_ON_ONCE(rc < 0);
+}
+static int is_pagetable_dying_supported(void)
+{
+	struct xen_hvm_pagetable_dying a;
+	int rc = 0;
+	a.domid = DOMID_SELF;
+	a.gpa = 0x00;
+	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
+	if (rc < 0) {
+		printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
+		return 0;
+	}
+	return 1;
+}
+void __init xen_hvm_init_mmu_ops(void)
+{
+	if (is_pagetable_dying_supported())
+		pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+#ifdef CONFIG_PROC_VMCORE
+	register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
+#endif
+}
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
--- a/arch/x86/xen/pmu.h
+++ b/arch/x86/xen/pmu.h
@@ -4,8 +4,13 @@
 #include <xen/interface/xenpmu.h>
 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+#ifdef CONFIG_XEN_HAVE_VPMU
 void xen_pmu_init(int cpu);
 void xen_pmu_finish(int cpu);
+#else
+static inline void xen_pmu_init(int cpu) {}
+static inline void xen_pmu_finish(int cpu) {}
+#endif
 bool is_xen_pmu(int cpu);
 bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);

--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -11,7 +11,17 @@ extern void xen_send_IPI_self(int vector);
 extern int xen_smp_intr_init(unsigned int cpu);
 extern void xen_smp_intr_free(unsigned int cpu);
+int xen_smp_intr_init_pv(unsigned int cpu);
+void xen_smp_intr_free_pv(unsigned int cpu);
+void xen_smp_send_reschedule(int cpu);
+void xen_smp_send_call_function_ipi(const struct cpumask *mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
+struct xen_common_irq {
+	int irq;
+	char *name;
+};
 #else /* CONFIG_SMP */
 static inline int xen_smp_intr_init(unsigned int cpu)
@@ -19,6 +29,12 @@ static inline int xen_smp_intr_init(unsigned int cpu)
 	return 0;
 }
 static inline void xen_smp_intr_free(unsigned int cpu) {}
+static inline int xen_smp_intr_init_pv(unsigned int cpu)
+{
+	return 0;
+}
+static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
 #endif /* CONFIG_SMP */
 #endif
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
--- a/arch/x86/xen/suspend_hvm.c
+++ b/arch/x86/xen/suspend_hvm.c
+#include <linux/types.h>
+#include <xen/xen.h>
+#include <xen/features.h>
+#include <xen/interface/features.h>
+#include "xen-ops.h"
+void xen_hvm_post_suspend(int suspend_cancelled)
+{
+	int cpu;
+	if (!suspend_cancelled)
+		xen_hvm_init_shared_info();
+	xen_callback_vector();
+	xen_unplug_emulated_devices();
+	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
+		for_each_online_cpu(cpu) {
+			xen_setup_runstate_info(cpu);
+		}
+	}
+}
--- a/arch/x86/xen/suspend_pv.c
+++ b/arch/x86/xen/suspend_pv.c
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -436,6 +436,14 @@ static void xen_hvm_setup_cpu_clockevents(void)
 void __init xen_hvm_init_time_ops(void)
 {
+	/*
+	 * vector callback is needed otherwise we cannot receive interrupts
+	 * on cpu > 0 and at this point we don't know how many cpus are
+	 * available.
+	 */
+	if (!xen_have_vector_callback)
+		return;
 	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
 		printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
 				"disable pv timer\n");

--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -434,7 +434,7 @@ static int map_data_for_request(struct vscsifrnt_info *info,
 	if (seg_grants) {
 		page = virt_to_page(seg);
-		off = (unsigned long)seg & ~PAGE_MASK;
+		off = offset_in_page(seg);
 		len = sizeof(struct scsiif_request_segment) * data_grants;
 		while (len > 0) {
 			bytes = min_t(unsigned int, len, PAGE_SIZE - off);

--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/arm/page-coherent.h
--- a/include/xen/interface/io/9pfs.h
+++ b/include/xen/interface/io/9pfs.h
--- a/include/xen/interface/io/displif.h
+++ b/include/xen/interface/io/displif.h
--- a/include/xen/interface/io/kbdif.h
+++ b/include/xen/interface/io/kbdif.h
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
--- a/include/xen/interface/io/sndif.h
+++ b/include/xen/interface/io/sndif.h
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c