Commit 84621c9b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull Xen updates from Konrad Rzeszutek Wilk:
 "Two major features that Xen community is excited about:

  The first is event channel scalability by David Vrabel - we switch
  over from an two-level per-cpu bitmap of events (IRQs) - to an FIFO
  queue with priorities.  This lets us be able to handle more events,
  have lower latency, and better scalability.  Good stuff.

  The other is PVH by Mukesh Rathor.  In short, PV is a mode where the
  kernel lets the hypervisor program page-tables, segments, etc.  With
  EPT/NPT capabilities in current processors, the overhead of doing this
  in an HVM (Hardware Virtual Machine) container is much lower than the
  hypervisor doing it for us.

  In short we let a PV guest run without doing page-table, segment,
  syscall, etc updates through the hypervisor - instead it is all done
  within the guest container.  It is a "hybrid" PV - hence the 'PVH'
  name - a PV guest within an HVM container.

  The major benefits are less code to deal with - for example we only
  use one function from the the pv_mmu_ops (which has 39 function
  calls); faster performance for syscall (no context switches into the
  hypervisor); less traps on various operations; etc.

  It is still being baked - the ABI is not yet set in stone.  But it is
  pretty awesome and we are excited about it.

  Lastly, there are some changes to ARM code - you should get a simple
  conflict which has been resolved in #linux-next.

  In short, this pull has awesome features.

  Features:
   - FIFO event channels.  Key advantages: support for over 100,000
     events (2^17), 16 different event priorities, improved fairness in
     event latency through the use of FIFOs.
   - Xen PVH support.  "It’s a fully PV kernel mode, running with
     paravirtualized disk and network, paravirtualized interrupts and
     timers, no emulated devices of any kind (and thus no qemu), no BIOS
     or legacy boot — but instead of requiring PV MMU, it uses the HVM
     hardware extensions to virtualize the pagetables, as well as system
     calls and other privileged operations." (from "The
     Paravirtualization Spectrum, Part 2: From poles to a spectrum")

  Bug-fixes:
   - Fixes in balloon driver (refactor and make it work under ARM)
   - Allow xenfb to be used in HVM guests.
   - Allow xen_platform_pci=0 to work properly.
   - Refactors in event channels"

* tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (52 commits)
  xen/pvh: Set X86_CR0_WP and others in CR0 (v2)
  MAINTAINERS: add git repository for Xen
  xen/pvh: Use 'depend' instead of 'select'.
  xen: delete new instances of __cpuinit usage
  xen/fb: allow xenfb initialization for hvm guests
  xen/evtchn_fifo: fix error return code in evtchn_fifo_setup()
  xen-platform: fix error return code in platform_pci_init()
  xen/pvh: remove duplicated include from enlighten.c
  xen/pvh: Fix compile issues with xen_pvh_domain()
  xen: Use dev_is_pci() to check whether it is pci device
  xen/grant-table: Force to use v1 of grants.
  xen/pvh: Support ParaVirtualized Hardware extensions (v3).
  xen/pvh: Piggyback on PVHVM XenBus.
  xen/pvh: Piggyback on PVHVM for grant driver (v4)
  xen/grant: Implement an grant frame array struct (v3).
  xen/grant-table: Refactor gnttab_init
  xen/grants: Remove gnttab_max_grant_frames dependency on gnttab_init.
  xen/pvh: Piggyback on PVHVM for event channels (v2)
  xen/pvh: Update E820 to work with PVH (v2)
  xen/pvh: Secondary VCPU bringup (non-bootup CPUs)
  ...
parents 7ebd3faa c9f6e997
......@@ -9559,6 +9559,7 @@ M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
M: Boris Ostrovsky <boris.ostrovsky@oracle.com>
M: David Vrabel <david.vrabel@citrix.com>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
S: Supported
F: arch/x86/xen/
F: drivers/*/xen-*front.c
......
......@@ -117,6 +117,7 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
return __set_phys_to_machine(pfn, mfn);
}
#define xen_remap(cookie, size) ioremap_cache((cookie), (size));
#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
#define xen_unmap(cookie) iounmap((cookie))
#endif /* _ASM_ARM_XEN_PAGE_H */
......@@ -208,6 +208,7 @@ static int __init xen_guest_init(void)
const char *version = NULL;
const char *xen_prefix = "xen,xen-";
struct resource res;
unsigned long grant_frames;
node = of_find_compatible_node(NULL, NULL, "xen,xen");
if (!node) {
......@@ -224,10 +225,10 @@ static int __init xen_guest_init(void)
}
if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
return 0;
xen_hvm_resume_frames = res.start;
grant_frames = res.start;
xen_events_irq = irq_of_parse_and_map(node, 0);
pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n",
version, xen_events_irq, (xen_hvm_resume_frames >> PAGE_SHIFT));
version, xen_events_irq, (grant_frames >> PAGE_SHIFT));
xen_domain_type = XEN_HVM_DOMAIN;
xen_setup_features();
......@@ -265,6 +266,10 @@ static int __init xen_guest_init(void)
if (xen_vcpu_info == NULL)
return -ENOMEM;
if (gnttab_setup_auto_xlat_frames(grant_frames)) {
free_percpu(xen_vcpu_info);
return -ENOMEM;
}
gnttab_init();
if (!xen_initial_domain())
xenbus_probe(NULL);
......
......@@ -167,7 +167,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
*/
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn = mfn_to_pfn(mfn);
unsigned long pfn;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
pfn = mfn_to_pfn(mfn);
if (get_phys_to_machine(pfn) != mfn)
return -1; /* force !pfn_valid() */
return pfn;
......@@ -222,5 +227,6 @@ void make_lowmem_page_readonly(void *vaddr);
void make_lowmem_page_readwrite(void *vaddr);
#define xen_remap(cookie, size) ioremap((cookie), (size));
#define xen_unmap(cookie) iounmap((cookie))
#endif /* _ASM_X86_XEN_PAGE_H */
......@@ -51,3 +51,7 @@ config XEN_DEBUG_FS
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
config XEN_PVH
bool "Support for running as a PVH guest"
depends on X86_64 && XEN && XEN_PVHVM
def_bool n
......@@ -262,8 +262,9 @@ static void __init xen_banner(void)
struct xen_extraversion extra;
HYPERVISOR_xen_version(XENVER_extraversion, &extra);
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
pv_info.name);
pr_info("Booting paravirtualized kernel %son %s\n",
xen_feature(XENFEAT_auto_translated_physmap) ?
"with PVH extensions " : "", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
......@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void)
ax = 1;
cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
cpuid(1, &ax, &bx, &cx, &dx);
xsave_mask =
(1 << (X86_FEATURE_XSAVE % 32)) |
......@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void)
xen_vcpu_setup(cpu);
/* xen_vcpu_setup managed to place the vcpu_info within the
percpu area for all cpus, so make use of it */
if (have_vcpu_info_placement) {
* percpu area for all cpus, so make use of it. Note that for
* PVH we want to use native IRQ mechanism. */
if (have_vcpu_info_placement && !xen_pvh_domain()) {
pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
......@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void)
* Set up the GDT and segment registers for -fstack-protector. Until
* we do this, we have to be careful not to call any stack-protected
* function, which is most of the kernel.
*
* Note, that it is __ref because the only caller of this after init
* is PVH which is not going to use xen_load_gdt_boot or other
* __init functions.
*/
static void __init xen_setup_stackprotector(void)
static void __ref xen_setup_gdt(int cpu)
{
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_X86_64
unsigned long dummy;
load_percpu_segment(cpu); /* We need to access per-cpu area */
switch_to_new_gdt(cpu); /* GDT and GS set */
/* We are switching of the Xen provided GDT to our HVM mode
* GDT. The new GDT has __KERNEL_CS with CS.L = 1
* and we are jumping to reload it.
*/
asm volatile ("pushq %0\n"
"leaq 1f(%%rip),%0\n"
"pushq %0\n"
"lretq\n"
"1:\n"
: "=&r" (dummy) : "0" (__KERNEL_CS));
/*
* While not needed, we also set the %es, %ds, and %fs
* to zero. We don't care about %ss as it is NULL.
* Strictly speaking this is not needed as Xen zeros those
* out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
*
* Linux zeros them in cpu_init() and in secondary_startup_64
* (for BSP).
*/
loadsegment(es, 0);
loadsegment(ds, 0);
loadsegment(fs, 0);
#else
/* PVH: TODO Implement. */
BUG();
#endif
return; /* PVH does not need any PV GDT ops. */
}
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;
......@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void)
pv_cpu_ops.load_gdt = xen_load_gdt;
}
/*
* A PV guest starts with default flags that are not set for PVH, set them
* here asap.
*/
static void xen_pvh_set_cr_flags(int cpu)
{
/* Some of these are setup in 'secondary_startup_64'. The others:
* X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
* (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
}
/*
* Note, that it is ref - because the only caller of this after init
* is PVH which is not going to use xen_load_gdt_boot or other
* __init functions.
*/
void __ref xen_pvh_secondary_vcpu_init(int cpu)
{
xen_setup_gdt(cpu);
xen_pvh_set_cr_flags(cpu);
}
static void __init xen_pvh_early_guest_init(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap))
return;
if (!xen_feature(XENFEAT_hvm_callback_vector))
return;
xen_have_vector_callback = 1;
xen_pvh_set_cr_flags(0);
#ifdef CONFIG_X86_32
BUG(); /* PVH: Implement proper support. */
#endif
}
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
......@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
xen_setup_features();
xen_pvh_early_guest_init();
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
if (!xen_pvh_domain())
pv_cpu_ops = xen_cpu_ops;
x86_init.resources.memory_setup = xen_memory_setup;
x86_init.oem.arch_setup = xen_arch_setup;
......@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void)
/* Work out if we support NX */
x86_configure_nx();
xen_setup_features();
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
xen_build_dynamic_phys_to_machine();
/*
* Set up kernel GDT and segment registers, mainly so that
* -fstack-protector code can be executed.
*/
xen_setup_stackprotector();
xen_setup_gdt(0);
xen_init_irq_ops();
xen_init_cpuid_mask();
......@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void)
/* set the limit of our address space */
xen_reserve_top();
/* We used to do this in xen_arch_setup, but that is too late on AMD
* were early_cpu_init (run before ->arch_setup()) calls early_amd_init
* which pokes 0xcf8 port.
*/
set_iopl.iopl = 1;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
/* PVH: runs at default kernel iopl of 0 */
if (!xen_pvh_domain()) {
/*
* We used to do this in xen_arch_setup, but that is too late
* on AMD were early_cpu_init (run before ->arch_setup()) calls
* early_amd_init which pokes 0xcf8 port.
*/
set_iopl.iopl = 1;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
}
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
......
......@@ -125,3 +125,66 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
}
#ifdef CONFIG_XEN_PVH
#include <xen/balloon.h>
#include <xen/events.h>
#include <xen/xen.h>
#include <linux/slab.h>
static int __init xlated_setup_gnttab_pages(void)
{
struct page **pages;
xen_pfn_t *pfns;
int rc;
unsigned int i;
unsigned long nr_grant_frames = gnttab_max_grant_frames();
BUG_ON(nr_grant_frames == 0);
pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
if (!pages)
return -ENOMEM;
pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
if (!pfns) {
kfree(pages);
return -ENOMEM;
}
rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
if (rc) {
pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
nr_grant_frames, rc);
kfree(pages);
kfree(pfns);
return rc;
}
for (i = 0; i < nr_grant_frames; i++)
pfns[i] = page_to_pfn(pages[i]);
rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames,
&xen_auto_xlat_grant_frames.vaddr);
kfree(pages);
if (rc) {
pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
nr_grant_frames, rc);
free_xenballooned_pages(nr_grant_frames, pages);
kfree(pfns);
return rc;
}
xen_auto_xlat_grant_frames.pfn = pfns;
xen_auto_xlat_grant_frames.count = nr_grant_frames;
return 0;
}
static int __init xen_pvh_gnttab_setup(void)
{
if (!xen_pvh_domain())
return -ENODEV;
return xlated_setup_gnttab_pages();
}
/* Call it _before_ __gnttab_init as we need to initialize the
* xen_auto_xlat_grant_frames first. */
core_initcall(xen_pvh_gnttab_setup);
#endif
......@@ -5,6 +5,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
#include <xen/features.h>
#include <xen/events.h>
#include <asm/xen/hypercall.h>
......@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
void __init xen_init_irq_ops(void)
{
pv_irq_ops = xen_irq_ops;
/* For PVH we use default pv_irq_ops settings. */
if (!xen_feature(XENFEAT_hvm_callback_vector))
pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
......@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
* instead of somewhere later and be confusing. */
xen_mc_flush();
}
#endif
static void __init xen_pagetable_init(void)
static void __init xen_pagetable_p2m_copy(void)
{
#ifdef CONFIG_X86_64
unsigned long size;
unsigned long addr;
#endif
paging_init();
xen_setup_shared_info();
#ifdef CONFIG_X86_64
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long new_mfn_list;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
/* On 32-bit, we get zero so this never gets executed. */
new_mfn_list = xen_revector_p2m_tree();
if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
/* using __ka address and sticking INVALID_P2M_ENTRY! */
memset((void *)xen_start_info->mfn_list, 0xff, size);
/* We should be in __ka space. */
BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
addr = xen_start_info->mfn_list;
/* We roundup to the PMD, which means that if anybody at this stage is
* using the __ka address of xen_start_info or xen_start_info->shared_info
* they are in going to crash. Fortunatly we have already revectored
* in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
size = roundup(size, PMD_SIZE);
xen_cleanhighmap(addr, addr + size);
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
memblock_free(__pa(xen_start_info->mfn_list), size);
/* And revector! Bye bye old array */
xen_start_info->mfn_list = new_mfn_list;
} else
goto skip;
}
unsigned long new_mfn_list;
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
new_mfn_list = xen_revector_p2m_tree();
/* No memory or already called. */
if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
return;
/* using __ka address and sticking INVALID_P2M_ENTRY! */
memset((void *)xen_start_info->mfn_list, 0xff, size);
/* We should be in __ka space. */
BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
addr = xen_start_info->mfn_list;
/* We roundup to the PMD, which means that if anybody at this stage is
* using the __ka address of xen_start_info or xen_start_info->shared_info
* they are in going to crash. Fortunatly we have already revectored
* in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
size = roundup(size, PMD_SIZE);
xen_cleanhighmap(addr, addr + size);
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
memblock_free(__pa(xen_start_info->mfn_list), size);
/* And revector! Bye bye old array */
xen_start_info->mfn_list = new_mfn_list;
/* At this stage, cleanup_highmap has already cleaned __ka space
* from _brk_limit way up to the max_pfn_mapped (which is the end of
* the ramdisk). We continue on, erasing PMD entries that point to page
......@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void)
* anything at this stage. */
xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
#endif
skip:
}
#endif
static void __init xen_pagetable_init(void)
{
paging_init();
xen_setup_shared_info();
#ifdef CONFIG_X86_64
xen_pagetable_p2m_copy();
#endif
xen_post_allocator_init();
}
......@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
/* For PVH no need to set R/O or R/W to pin them or unpin them. */
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG();
}
......@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
* but that's enough to get __va working. We need to fill in the rest
* of the physical mapping once some sort of allocator has been set
* up.
* NOTE: for PVH, the page tables are native.
*/
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
......@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Zap identity mapping */
init_level4_pgt[0] = __pgd(0);
/* Pre-constructed entries are in pfn, so convert to mfn */
/* L4[272] -> level3_ident_pgt
* L4[511] -> level3_kernel_pgt */
convert_pfn_mfn(init_level4_pgt);
/* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt);
/* L3_k[510] -> level2_kernel_pgt
* L3_i[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Pre-constructed entries are in pfn, so convert to mfn */
/* L4[272] -> level3_ident_pgt
* L4[511] -> level3_kernel_pgt */
convert_pfn_mfn(init_level4_pgt);
/* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt);
/* L3_k[510] -> level2_kernel_pgt
* L3_i[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
}
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
......@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
copy_page(level2_fixmap_pgt, l2);
/* Note that we don't do anything with level1_fixmap_pgt which
* we don't need. */
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Make pagetable pieces RO */
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
PFN_DOWN(__pa_symbol(init_level4_pgt)));
/* Unpin Xen-provided one */
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
/* Make pagetable pieces RO */
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
PFN_DOWN(__pa_symbol(init_level4_pgt)));
/* Unpin Xen-provided one */
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
/*
* At this stage there can be no user pgd, and no page
* structure to attach it to, so make sure we just set kernel
* pgd.
*/
xen_mc_batch();
__xen_write_cr3(true, __pa(init_level4_pgt));
xen_mc_issue(PARAVIRT_LAZY_CPU);
/*
* At this stage there can be no user pgd, and no page
* structure to attach it to, so make sure we just set kernel
* pgd.
*/
xen_mc_batch();
__xen_write_cr3(true, __pa(init_level4_pgt));
xen_mc_issue(PARAVIRT_LAZY_CPU);
} else
native_write_cr3(__pa(init_level4_pgt));
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
* set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
......@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
static void __init xen_post_allocator_init(void)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
......@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void)
{
x86_init.paging.pagetable_init = xen_pagetable_init;
/* Optimization - we can use the HVM one but it has no idea which
* VCPUs are descheduled - which means that it will needlessly IPI
* them. Xen knows so let it do the job.
*/
if (xen_feature(XENFEAT_auto_translated_physmap)) {
pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
return;
}
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
......
......@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void)
{
unsigned long pfn;
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
/* Pre-initialize p2m_top_mfn to be completely missing */
if (p2m_top_mfn == NULL) {
p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
......@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void)
void xen_setup_mfn_list_list(void)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
......@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void)
/* Set up p2m_top to point to the domain-builder provided p2m pages */
void __init xen_build_dynamic_phys_to_machine(void)
{
unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
unsigned long *mfn_list;
unsigned long max_pfn;
unsigned long pfn;
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
mfn_list = (unsigned long *)xen_start_info->mfn_list;
max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
xen_max_p2m_pfn = max_pfn;
p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
......
......@@ -30,10 +30,9 @@
#define XEN_PLATFORM_ERR_PROTOCOL -2
#define XEN_PLATFORM_ERR_BLACKLIST -3
/* store the value of xen_emul_unplug after the unplug is done */
int xen_platform_pci_unplug;
EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
#ifdef CONFIG_XEN_PVHVM
/* store the value of xen_emul_unplug after the unplug is done */
static int xen_platform_pci_unplug;
static int xen_emul_unplug;
static int check_platform_magic(void)
......@@ -69,6 +68,80 @@ static int check_platform_magic(void)
return 0;
}
bool xen_has_pv_devices()
{
if (!xen_domain())
return false;
/* PV domains always have them. */
if (xen_pv_domain())
return true;
/* And user has xen_platform_pci=0 set in guest config as
* driver did not modify the value. */
if (xen_platform_pci_unplug == 0)
return false;
if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER)
return false;
if (xen_platform_pci_unplug & XEN_UNPLUG_ALL)
return true;
/* This is an odd one - we are going to run legacy
* and PV drivers at the same time. */
if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
return true;
/* And the caller has to follow with xen_pv_{disk,nic}_devices
* to be certain which driver can load. */
return false;
}
EXPORT_SYMBOL_GPL(xen_has_pv_devices);
static bool __xen_has_pv_device(int state)
{
/* HVM domains might or might not */
if (xen_hvm_domain() && (xen_platform_pci_unplug & state))
return true;
return xen_has_pv_devices();
}
bool xen_has_pv_nic_devices(void)
{
return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL);
}
EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices);
bool xen_has_pv_disk_devices(void)
{
return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS |
XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL);
}
EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices);
/*
* This one is odd - it determines whether you want to run PV _and_
* legacy (IDE) drivers together. This combination is only possible
* under HVM.
*/
bool xen_has_pv_and_legacy_disk_devices(void)
{
if (!xen_domain())
return false;
/* N.B. This is only ever used in HVM mode */
if (xen_pv_domain())
return false;
if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
return true;
return false;
}
EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices);
void xen_unplug_emulated_devices(void)
{
int r;
......
......@@ -27,6 +27,7 @@
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
#include "mmu.h"
#include "xen-ops.h"
#include "vdso.h"
......@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
memblock_reserve(start, size);
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
xen_max_p2m_pfn = PFN_DOWN(start + size);
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
......@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
.domid = DOMID_SELF
};
unsigned long len = 0;
int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
unsigned long pfn;
int ret;
......@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
continue;
frame = mfn;
} else {
if (mfn != INVALID_P2M_ENTRY)
if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
}
......@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start,
static unsigned long __init xen_release_chunk(unsigned long start,
unsigned long end)
{
/*
* Xen already ballooned out the E820 non RAM regions for us
* and set them up properly in EPT.
*/
if (xen_feature(XENFEAT_auto_translated_physmap))
return end - start;
return xen_do_chunk(start, end, true);
}
......@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk(
* (except for the ISA region which must be 1:1 mapped) to
* release the refcounts (in Xen) on the original frames.
*/
for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
/*
* PVH E820 matches the hypervisor's P2M which means we need to
* account for the proper values of *release and *identity.
*/
for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
pte_t pte = __pte_ma(0);
if (pfn < PFN_UP(ISA_END_ADDRESS))
......@@ -563,16 +581,13 @@ void xen_enable_nmi(void)
BUG();
#endif
}
void __init xen_arch_setup(void)
void __init xen_pvmmu_arch_setup(void)
{
xen_panic_handler_init();
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
if (!xen_feature(XENFEAT_auto_translated_physmap))
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_pae_extended_cr3);
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
......@@ -581,6 +596,15 @@ void __init xen_arch_setup(void)
xen_enable_sysenter();
xen_enable_syscall();
xen_enable_nmi();
}
/* This function is not called for HVM domains */
void __init xen_arch_setup(void)
{
xen_panic_handler_init();
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
......
......@@ -73,9 +73,11 @@ static void cpu_bringup(void)
touch_softlockup_watchdog();
preempt_disable();
xen_enable_sysenter();
xen_enable_syscall();
/* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
xen_enable_sysenter();
xen_enable_syscall();
}
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
......@@ -97,8 +99,14 @@ static void cpu_bringup(void)
wmb(); /* make sure everything is out */
}
static void cpu_bringup_and_idle(void)
/* Note: cpu parameter is only relevant for PVH */
static void cpu_bringup_and_idle(int cpu)
{
#ifdef CONFIG_X86_64
if (xen_feature(XENFEAT_auto_translated_physmap) &&
xen_feature(XENFEAT_supervisor_mode_kernel))
xen_pvh_secondary_vcpu_init(cpu);
#endif
cpu_bringup();
cpu_startup_entry(CPUHP_ONLINE);
}
......@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void)
native_smp_prepare_boot_cpu();
if (xen_pv_domain()) {
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
make_lowmem_page_readwrite(xen_initial_gdt);
if (!xen_feature(XENFEAT_writable_page_tables))
/* We've switched to the "real" per-cpu gdt, so make
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
#ifdef CONFIG_X86_32
/*
......@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu);
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
/* Note: PVH is not yet supported on x86_32. */
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
xen_copy_trap_info(ctxt->trap_ctxt);
......@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
ctxt->failsafe_callback_cs = __KERNEL_CS;
#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
#ifdef CONFIG_X86_32
}
ctxt->user_regs.cs = __KERNEL_CS;
#else
} else
/* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
* %rdi having the cpu number - which means are passing in
* as the first parameter the cpu. Subtle!
*/
ctxt->user_regs.rdi = cpu;
#endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
BUG();
......
......@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu)
IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
IRQF_FORCE_RESUME,
name, NULL);
(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
memcpy(evt, xen_clockevent, sizeof(*evt));
......
......@@ -11,8 +11,28 @@
#include <asm/page_types.h>
#include <xen/interface/elfnote.h>
#include <xen/interface/features.h>
#include <asm/xen/interface.h>
#ifdef CONFIG_XEN_PVH
#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
* balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
* XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
*/
#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
(1 << XENFEAT_auto_translated_physmap) | \
(1 << XENFEAT_supervisor_mode_kernel) | \
(1 << XENFEAT_hvm_callback_vector))
/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that
* up regardless whether this CONFIG option is enabled or not, but it
* clarifies what the right flags need to be.
*/
#else
#define PVH_FEATURES_STR ""
#define PVH_FEATURES (0)
#endif
__INIT
ENTRY(startup_xen)
cld
......@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6)
#endif
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
(1 << XENFEAT_writable_page_tables) |
(1 << XENFEAT_dom0))
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
......
......@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void);
extern int xen_panic_handler_init(void);
void xen_pvh_secondary_vcpu_init(int cpu);
#endif /* XEN_OPS_H */
......@@ -1356,7 +1356,7 @@ static int blkfront_probe(struct xenbus_device *dev,
char *type;
int len;
/* no unplug has been done: do not hook devices != xen vbds */
if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
if (xen_has_pv_and_legacy_disk_devices()) {
int major;
if (!VDEV_IS_EXTENDED(vdevice))
......@@ -2079,7 +2079,7 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
if (xen_hvm_domain() && !xen_platform_pci_unplug)
if (!xen_has_pv_disk_devices())
return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
......
......@@ -17,6 +17,7 @@
#include <xen/xenbus.h>
#include <xen/page.h>
#include "tpm.h"
#include <xen/platform_pci.h>
struct tpm_private {
struct tpm_chip *chip;
......@@ -378,6 +379,9 @@ static int __init xen_tpmfront_init(void)
if (!xen_domain())
return -ENODEV;
if (!xen_has_pv_devices())
return -ENODEV;
return xenbus_register_frontend(&tpmfront_driver);
}
module_init(xen_tpmfront_init);
......
......@@ -29,6 +29,7 @@
#include <xen/interface/io/fbif.h>
#include <xen/interface/io/kbdif.h>
#include <xen/xenbus.h>
#include <xen/platform_pci.h>
struct xenkbd_info {
struct input_dev *kbd;
......@@ -380,6 +381,9 @@ static int __init xenkbd_init(void)
if (xen_initial_domain())
return -ENODEV;
if (!xen_has_pv_devices())
return -ENODEV;
return xenbus_register_frontend(&xenkbd_driver);
}
......
......@@ -2115,7 +2115,7 @@ static int __init netif_init(void)
if (!xen_domain())
return -ENODEV;
if (xen_hvm_domain() && !xen_platform_pci_unplug)
if (!xen_has_pv_nic_devices())
return -ENODEV;
pr_info("Initialising Xen virtual ethernet driver\n");
......
......@@ -20,6 +20,7 @@
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/time.h>
#include <xen/platform_pci.h>
#include <asm/xen/swiotlb-xen.h>
#define INVALID_GRANT_REF (0)
......@@ -1146,6 +1147,9 @@ static int __init pcifront_init(void)
if (!xen_pv_domain() || xen_initial_domain())
return -ENODEV;
if (!xen_has_pv_devices())
return -ENODEV;
pci_frontend_registrar(1 /* enable */);
return xenbus_register_frontend(&xenpci_driver);
......
......@@ -35,6 +35,7 @@
#include <xen/interface/io/fbif.h>
#include <xen/interface/io/protocols.h>
#include <xen/xenbus.h>
#include <xen/platform_pci.h>
struct xenfb_info {
unsigned char *fb;
......@@ -692,13 +693,16 @@ static DEFINE_XENBUS_DRIVER(xenfb, ,
static int __init xenfb_init(void)
{
if (!xen_pv_domain())
if (!xen_domain())
return -ENODEV;
/* Nothing to do if running in dom0. */
if (xen_initial_domain())
return -ENODEV;
if (!xen_has_pv_devices())
return -ENODEV;
return xenbus_register_frontend(&xenfb_driver);
}
......
......@@ -3,7 +3,6 @@ menu "Xen driver support"
config XEN_BALLOON
bool "Xen memory balloon driver"
depends on !ARM
default y
help
The balloon driver allows the Xen domain to request more memory from
......
......@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
obj-y += grant-table.o features.o events.o balloon.o manage.o
obj-y += grant-table.o features.o balloon.o manage.o
obj-y += events/
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
......
......@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
return page;
}
static struct page *balloon_first_page(void)
{
if (list_empty(&ballooned_pages))
return NULL;
return list_entry(ballooned_pages.next, struct page, lru);
}
static struct page *balloon_next_page(struct page *page)
{
struct list_head *next = page->lru.next;
......@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
page = balloon_first_page();
page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
for (i = 0; i < nr_pages; i++) {
if (!page) {
nr_pages = i;
......
......@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
dbgp.op = op;
#ifdef CONFIG_PCI
if (ctrlr->bus == &pci_bus_type) {
if (dev_is_pci(ctrlr)) {
const struct pci_dev *pdev = to_pci_dev(ctrlr);
dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
......
obj-y += events.o
events-y += events_base.o
events-y += events_2l.o
events-y += events_fifo.o
This diff is collapsed.
This diff is collapsed.
/*
* Xen Event Channels (internal header)
*
* Copyright (C) 2013 Citrix Systems R&D Ltd.
*
* This source code is licensed under the GNU General Public License,
* Version 2 or later. See the file COPYING for more details.
*/
#ifndef __EVENTS_INTERNAL_H__
#define __EVENTS_INTERNAL_H__
/* Interrupt types. */
enum xen_irq_type {
IRQT_UNBOUND = 0,
IRQT_PIRQ,
IRQT_VIRQ,
IRQT_IPI,
IRQT_EVTCHN
};
/*
* Packed IRQ information:
* type - enum xen_irq_type
* event channel - irq->event channel mapping
* cpu - cpu this event channel is bound to
* index - type-specific information:
* PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
* guest, or GSI (real passthrough IRQ) of the device.
* VIRQ - virq number
* IPI - IPI vector
* EVTCHN -
*/
struct irq_info {
struct list_head list;
int refcnt;
enum xen_irq_type type; /* type */
unsigned irq;
unsigned int evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
union {
unsigned short virq;
enum ipi_vector ipi;
struct {
unsigned short pirq;
unsigned short gsi;
unsigned char vector;
unsigned char flags;
uint16_t domid;
} pirq;
} u;
};
#define PIRQ_NEEDS_EOI (1 << 0)
#define PIRQ_SHAREABLE (1 << 1)
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
int (*setup)(struct irq_info *info);
void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
void (*clear_pending)(unsigned port);
void (*set_pending)(unsigned port);
bool (*is_pending)(unsigned port);
bool (*test_and_set_mask)(unsigned port);
void (*mask)(unsigned port);
void (*unmask)(unsigned port);
void (*handle_events)(unsigned cpu);
void (*resume)(void);
};
extern const struct evtchn_ops *evtchn_ops;
extern int **evtchn_to_irq;
int get_evtchn_to_irq(unsigned int evtchn);
struct irq_info *info_for_irq(unsigned irq);
unsigned cpu_from_irq(unsigned irq);
unsigned cpu_from_evtchn(unsigned int evtchn);
static inline unsigned xen_evtchn_max_channels(void)
{
return evtchn_ops->max_channels();
}
/*
* Do any ABI specific setup for a bound event channel before it can
* be unmasked and used.
*/
static inline int xen_evtchn_port_setup(struct irq_info *info)
{
if (evtchn_ops->setup)
return evtchn_ops->setup(info);
return 0;
}
static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
unsigned cpu)
{
evtchn_ops->bind_to_cpu(info, cpu);
}
static inline void clear_evtchn(unsigned port)
{
evtchn_ops->clear_pending(port);
}
static inline void set_evtchn(unsigned port)
{
evtchn_ops->set_pending(port);
}
static inline bool test_evtchn(unsigned port)
{
return evtchn_ops->is_pending(port);
}
static inline bool test_and_set_mask(unsigned port)
{
return evtchn_ops->test_and_set_mask(port);
}
static inline void mask_evtchn(unsigned port)
{
return evtchn_ops->mask(port);
}
static inline void unmask_evtchn(unsigned port)
{
return evtchn_ops->unmask(port);
}
static inline void xen_evtchn_handle_events(unsigned cpu)
{
return evtchn_ops->handle_events(cpu);
}
static inline void xen_evtchn_resume(void)
{
if (evtchn_ops->resume)
evtchn_ops->resume();
}
void xen_evtchn_2l_init(void);
int xen_evtchn_fifo_init(void);
#endif /* #ifndef __EVENTS_INTERNAL_H__ */
......@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
break;
rc = -EINVAL;
if (unbind.port >= NR_EVENT_CHANNELS)
if (unbind.port >= xen_evtchn_nr_channels())
break;
rc = -ENOTCONN;
......
......@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
if (!xen_domain())
return -ENODEV;
use_ptemod = xen_pv_domain();
use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
err = misc_register(&gntdev_miscdev);
if (err != 0) {
......
......@@ -62,12 +62,10 @@
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
static unsigned int boot_max_nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
unsigned long xen_hvm_resume_frames;
EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
struct grant_frames xen_auto_xlat_grant_frames;
static union {
struct grant_entry_v1 *v1;
......@@ -827,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
unsigned int gnttab_max_grant_frames(void)
{
unsigned int xen_max = __max_nr_grant_frames();
static unsigned int boot_max_nr_grant_frames;
/* First time, initialize it properly. */
if (!boot_max_nr_grant_frames)
boot_max_nr_grant_frames = __max_nr_grant_frames();
if (xen_max > boot_max_nr_grant_frames)
return boot_max_nr_grant_frames;
......@@ -834,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
}
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
int gnttab_setup_auto_xlat_frames(unsigned long addr)
{
xen_pfn_t *pfn;
unsigned int max_nr_gframes = __max_nr_grant_frames();
unsigned int i;
void *vaddr;
if (xen_auto_xlat_grant_frames.count)
return -EINVAL;
vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
if (vaddr == NULL) {
pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
addr);
return -ENOMEM;
}
pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
if (!pfn) {
xen_unmap(vaddr);
return -ENOMEM;
}
for (i = 0; i < max_nr_gframes; i++)
pfn[i] = PFN_DOWN(addr) + i;
xen_auto_xlat_grant_frames.vaddr = vaddr;
xen_auto_xlat_grant_frames.pfn = pfn;
xen_auto_xlat_grant_frames.count = max_nr_gframes;
return 0;
}
EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
void gnttab_free_auto_xlat_frames(void)
{
if (!xen_auto_xlat_grant_frames.count)
return;
kfree(xen_auto_xlat_grant_frames.pfn);
xen_unmap(xen_auto_xlat_grant_frames.vaddr);
xen_auto_xlat_grant_frames.pfn = NULL;
xen_auto_xlat_grant_frames.count = 0;
xen_auto_xlat_grant_frames.vaddr = NULL;
}
EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
/* Handling of paged out grant targets (GNTST_eagain) */
#define MAX_DELAY 256
static inline void
......@@ -1060,10 +1108,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
unsigned int nr_gframes = end_idx + 1;
int rc;
if (xen_hvm_domain()) {
if (xen_feature(XENFEAT_auto_translated_physmap)) {
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
rc = 0;
BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
/*
* Loop backwards, so that the first hypercall has the largest
* index, ensuring that the table will grow only once.
......@@ -1072,7 +1121,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc != 0) {
pr_warn("grant table add_to_physmap failed, err=%d\n",
......@@ -1135,10 +1184,8 @@ static void gnttab_request_version(void)
int rc;
struct gnttab_set_version gsv;
if (xen_hvm_domain())
gsv.version = 1;
else
gsv.version = 2;
gsv.version = 1;
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
if (rc == 0 && gsv.version == 2) {
grant_table_version = 2;
......@@ -1169,22 +1216,15 @@ static int gnttab_setup(void)
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
if (xen_pv_domain())
return gnttab_map(0, nr_grant_frames - 1);
if (gnttab_shared.addr == NULL) {
gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
PAGE_SIZE * max_nr_gframes);
if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
if (gnttab_shared.addr == NULL) {
pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
xen_hvm_resume_frames);
pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
(unsigned long)xen_auto_xlat_grant_frames.vaddr);
return -ENOMEM;
}
}
gnttab_map(0, nr_grant_frames - 1);
return 0;
return gnttab_map(0, nr_grant_frames - 1);
}
int gnttab_resume(void)
......@@ -1227,13 +1267,12 @@ int gnttab_init(void)
gnttab_request_version();
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
BUG_ON(grefs_per_grant_frame == 0);
max_nr_glist_frames = (boot_max_nr_grant_frames *
max_nr_glist_frames = (gnttab_max_grant_frames() *
grefs_per_grant_frame / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
......@@ -1286,5 +1325,6 @@ static int __gnttab_init(void)
return gnttab_init();
}
core_initcall(__gnttab_init);
/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
* beforehand to initialize xen_auto_xlat_grant_frames. */
core_initcall_sync(__gnttab_init);
......@@ -26,7 +26,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
#ifdef CONFIG_PCI_MMCONFIG
#include <asm/pci_x86.h>
#endif
static bool __read_mostly pci_seg_supported = true;
......
......@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
long ioaddr;
long mmio_addr, mmio_len;
unsigned int max_nr_gframes;
unsigned long grant_frames;
if (!xen_domain())
return -ENODEV;
......@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
}
max_nr_gframes = gnttab_max_grant_frames();
xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
ret = gnttab_init();
grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
ret = gnttab_setup_auto_xlat_frames(grant_frames);
if (ret)
goto out;
ret = gnttab_init();
if (ret)
goto grant_out;
xenbus_probe(NULL);
return 0;
grant_out:
gnttab_free_auto_xlat_frames();
out:
pci_release_region(pdev, 0);
mem_out:
......
......@@ -45,6 +45,7 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
#include <xen/features.h>
#include "xenbus_probe.h"
......@@ -743,7 +744,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
void __init xenbus_ring_ops_init(void)
{
if (xen_pv_domain())
if (!xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
......
......@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
if (xen_hvm_domain() && !xen_platform_pci_unplug)
if (!xen_has_pv_devices())
return -ENODEV;
ready_to_wait_for_devices = 1;
......
......@@ -7,6 +7,8 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/events.h>
unsigned xen_evtchn_nr_channels(void);
int bind_evtchn_to_irq(unsigned int evtchn);
int bind_evtchn_to_irqhandler(unsigned int evtchn,
irq_handler_t handler,
......@@ -37,6 +39,11 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
*/
void unbind_from_irqhandler(unsigned int irq, void *dev_id);
#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
int xen_set_irq_priority(unsigned irq, unsigned priority);
/*
* Allow extra references to event channels exposed to userspace by evtchn
*/
......@@ -73,6 +80,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
/* Determine the IRQ which is bound to an event channel */
unsigned irq_from_evtchn(unsigned int evtchn);
int irq_from_virq(unsigned int cpu, unsigned int virq);
unsigned int evtchn_from_irq(unsigned irq);
/* Xen HVM evtchn vector callback */
void xen_hvm_callback_vector(void);
......
......@@ -178,8 +178,15 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
grant_status_t **__shared);
void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
extern unsigned long xen_hvm_resume_frames;
struct grant_frames {
xen_pfn_t *pfn;
unsigned int count;
void *vaddr;
};
extern struct grant_frames xen_auto_xlat_grant_frames;
unsigned int gnttab_max_grant_frames(void);
int gnttab_setup_auto_xlat_frames(unsigned long addr);
void gnttab_free_auto_xlat_frames(void);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
......
......@@ -140,6 +140,19 @@
*/
#define XEN_ELFNOTE_SUSPEND_CANCEL 14
/*
* The features supported by this kernel (numeric).
*
* Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
* kernel to specify support for features that older hypervisors don't
* know about. The set of features 4.2 and newer hypervisors will
* consider supported by the kernel is the combination of the sets
* specified through this and the string note.
*
* LEGACY: FEATURES
*/
#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
/*
......
......@@ -190,6 +190,39 @@ struct evtchn_reset {
};
typedef struct evtchn_reset evtchn_reset_t;
/*
* EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
*/
#define EVTCHNOP_init_control 11
struct evtchn_init_control {
/* IN parameters. */
uint64_t control_gfn;
uint32_t offset;
uint32_t vcpu;
/* OUT parameters. */
uint8_t link_bits;
uint8_t _pad[7];
};
/*
* EVTCHNOP_expand_array: add an additional page to the event array.
*/
#define EVTCHNOP_expand_array 12
struct evtchn_expand_array {
/* IN parameters. */
uint64_t array_gfn;
};
/*
* EVTCHNOP_set_priority: set the priority for an event channel.
*/
#define EVTCHNOP_set_priority 13
struct evtchn_set_priority {
/* IN parameters. */
uint32_t port;
uint32_t priority;
};
struct evtchn_op {
uint32_t cmd; /* EVTCHNOP_* */
union {
......@@ -207,4 +240,39 @@ struct evtchn_op {
};
DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
/*
* 2-level ABI
*/
#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
/*
* FIFO ABI
*/
/* Events may have priorities from 0 (highest) to 15 (lowest). */
#define EVTCHN_FIFO_PRIORITY_MAX 0
#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
#define EVTCHN_FIFO_PRIORITY_MIN 15
#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
typedef uint32_t event_word_t;
#define EVTCHN_FIFO_PENDING 31
#define EVTCHN_FIFO_MASKED 30
#define EVTCHN_FIFO_LINKED 29
#define EVTCHN_FIFO_BUSY 28
#define EVTCHN_FIFO_LINK_BITS 17
#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
struct evtchn_fifo_control_block {
uint32_t ready;
uint32_t _rsvd;
event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
};
#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
......@@ -281,12 +281,6 @@ struct multicall_entry {
};
DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
/*
* Event channel endpoints per domain:
* 1024 if a long is 32 bits; 4096 if a long is 64 bits.
*/
#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
struct vcpu_time_info {
/*
* Updates to the following values are preceded and followed
......
......@@ -46,6 +46,27 @@ static inline int xen_must_unplug_disks(void) {
#endif
}
extern int xen_platform_pci_unplug;
#if defined(CONFIG_XEN_PVHVM)
extern bool xen_has_pv_devices(void);
extern bool xen_has_pv_disk_devices(void);
extern bool xen_has_pv_nic_devices(void);
extern bool xen_has_pv_and_legacy_disk_devices(void);
#else
static inline bool xen_has_pv_devices(void)
{
return IS_ENABLED(CONFIG_XEN);
}
static inline bool xen_has_pv_disk_devices(void)
{
return IS_ENABLED(CONFIG_XEN);
}
static inline bool xen_has_pv_nic_devices(void)
{
return IS_ENABLED(CONFIG_XEN);
}
static inline bool xen_has_pv_and_legacy_disk_devices(void)
{
return false;
}
#endif
#endif /* _XEN_PLATFORM_PCI_H */
......@@ -29,4 +29,18 @@ extern enum xen_domain_type xen_domain_type;
#define xen_initial_domain() (0)
#endif /* CONFIG_XEN_DOM0 */
#ifdef CONFIG_XEN_PVH
/* This functionality exists only for x86. The XEN_PVHVM support exists
* only in x86 world - hence on ARM it will be always disabled.
* N.B. ARM guests are neither PV nor HVM nor PVHVM.
* It's a bit like PVH but is different also (it's further towards the H
* end of the spectrum than even PVH).
*/
#include <xen/features.h>
#define xen_pvh_domain() (xen_pv_domain() && \
xen_feature(XENFEAT_auto_translated_physmap) && \
xen_have_vector_callback)
#else
#define xen_pvh_domain() (0)
#endif
#endif /* _XEN_XEN_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment