Commit 18cb657c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'stable/xen-pcifront-0.8.2' of...

Merge branch 'stable/xen-pcifront-0.8.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
  and branch 'for-linus' of git://xenbits.xen.org/people/sstabellini/linux-pvhvm

* 'for-linus' of git://xenbits.xen.org/people/sstabellini/linux-pvhvm:
  xen: register xen pci notifier
  xen: initialize cpu masks for pv guests in xen_smp_init
  xen: add a missing #include to arch/x86/pci/xen.c
  xen: mask the MTRR feature from the cpuid
  xen: make hvc_xen console work for dom0.
  xen: add the direct mapping area for ISA bus access
  xen: Initialize xenbus for dom0.
  xen: use vcpu_ops to setup cpu masks
  xen: map a dummy page for local apic and ioapic in xen_set_fixmap
  xen: remap MSIs into pirqs when running as initial domain
  xen: remap GSIs as pirqs when running as initial domain
  xen: introduce XEN_DOM0 as a silent option
  xen: map MSIs into pirqs
  xen: support GSI -> pirq remapping in PV on HVM guests
  xen: add xen hvm acpi_register_gsi variant
  acpi: use indirect call to register gsi in different modes
  xen: implement xen_hvm_register_pirq
  xen: get the maximum number of pirqs from xen
  xen: support pirq != irq

* 'stable/xen-pcifront-0.8.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: (27 commits)
  X86/PCI: Remove the dependency on isapnp_disable.
  xen: Update Makefile with CONFIG_BLOCK dependency for biomerge.c
  MAINTAINERS: Add myself to the Xen Hypervisor Interface and remove Chris Wright.
  x86: xen: Sanitse irq handling (part two)
  swiotlb-xen: On x86-32 builts, select SWIOTLB instead of depending on it.
  MAINTAINERS: Add myself for Xen PCI and Xen SWIOTLB maintainer.
  xen/pci: Request ACS when Xen-SWIOTLB is activated.
  xen-pcifront: Xen PCI frontend driver.
  xenbus: prevent warnings on unhandled enumeration values
  xenbus: Xen paravirtualised PCI hotplug support.
  xen/x86/PCI: Add support for the Xen PCI subsystem
  x86: Introduce x86_msi_ops
  msi: Introduce default_[teardown|setup]_msi_irqs with fallback.
  x86/PCI: Export pci_walk_bus function.
  x86/PCI: make sure _PAGE_IOMAP it set on pci mappings
  x86/PCI: Clean up pci_cache_line_size
  xen: fix shared irq device passthrough
  xen: Provide a variant of xen_poll_irq with timeout.
  xen: Find an unbound irq number in reverse order (high to low).
  xen: statically initialize cpu_evtchn_mask_p
  ...

Fix up trivial conflicts in drivers/pci/Makefile
parents 2301b65b e28c31a9
......@@ -6595,11 +6595,25 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.
S: Maintained
F: drivers/platform/x86
XEN PCI SUBSYSTEM
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: xen-devel@lists.xensource.com
S: Supported
F: arch/x86/pci/*xen*
F: drivers/pci/*xen*
XEN SWIOTLB SUBSYSTEM
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: xen-devel@lists.xensource.com
S: Supported
F: arch/x86/xen/*swiotlb*
F: drivers/xen/*swiotlb*
XEN HYPERVISOR INTERFACE
M: Jeremy Fitzhardinge <jeremy@xensource.com>
M: Chris Wright <chrisw@sous-sol.org>
M: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: xen-devel@lists.xen.org
L: virtualization@lists.osdl.org
L: xen-devel@lists.xensource.com
S: Supported
F: arch/x86/xen/
F: drivers/*/xen-*front.c
......
......@@ -1893,6 +1893,11 @@ config PCI_OLPC
def_bool y
depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
config PCI_XEN
def_bool y
depends on PCI && XEN
select SWIOTLB_XEN
config PCI_DOMAINS
def_bool y
depends on PCI
......
......@@ -93,6 +93,9 @@ extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity);
static inline void disable_acpi(void)
{
acpi_disabled = 1;
......
......@@ -41,6 +41,8 @@
#include <asm-generic/int-ll64.h>
#include <asm/page.h>
#include <xen/xen.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
......@@ -351,6 +353,17 @@ extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
#ifdef CONFIG_XEN
struct bio_vec;
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
const struct bio_vec *vec2);
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
(!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
#endif /* CONFIG_XEN */
#define IO_SPACE_LIMIT 0xffff
#endif /* _ASM_X86_IO_H */
......@@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern void probe_nr_irqs_gsi(void);
extern int get_nr_irqs_gsi(void);
extern void setup_ioapic_ids_from_mpc(void);
......
......@@ -7,6 +7,7 @@
#include <linux/string.h>
#include <asm/scatterlist.h>
#include <asm/io.h>
#include <asm/x86_init.h>
#ifdef __KERNEL__
......@@ -94,8 +95,36 @@ static inline void early_quirks(void) { }
extern void pci_iommu_alloc(void);
/* MSI arch hook */
#define arch_setup_msi_irqs arch_setup_msi_irqs
#ifdef CONFIG_PCI_MSI
/* MSI arch specific hooks */
static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
return x86_msi.setup_msi_irqs(dev, nvec, type);
}
static inline void x86_teardown_msi_irqs(struct pci_dev *dev)
{
x86_msi.teardown_msi_irqs(dev);
}
static inline void x86_teardown_msi_irq(unsigned int irq)
{
x86_msi.teardown_msi_irq(irq);
}
#define arch_setup_msi_irqs x86_setup_msi_irqs
#define arch_teardown_msi_irqs x86_teardown_msi_irqs
#define arch_teardown_msi_irq x86_teardown_msi_irq
/* implemented in arch/x86/kernel/apic/io_apic. */
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
/* default to the implementation in drivers/lib/msi.c */
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
void default_teardown_msi_irqs(struct pci_dev *dev);
#else
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#define default_teardown_msi_irqs NULL
#endif
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
......
......@@ -47,6 +47,7 @@ enum pci_bf_sort_state {
extern unsigned int pcibios_max_latency;
void pcibios_resource_survey(void);
void pcibios_set_cache_line_size(void);
/* pci-pc.c */
......
......@@ -154,9 +154,18 @@ struct x86_platform_ops {
int (*i8042_detect)(void);
};
struct pci_dev;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
};
extern struct x86_init_ops x86_init;
extern struct x86_cpuinit_ops x86_cpuinit;
extern struct x86_platform_ops x86_platform;
extern struct x86_msi_ops x86_msi;
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
......
#ifndef _ASM_X86_XEN_PCI_H
#define _ASM_X86_XEN_PCI_H
#if defined(CONFIG_PCI_XEN)
extern int __init pci_xen_init(void);
extern int __init pci_xen_hvm_init(void);
#define pci_xen 1
#else
#define pci_xen 0
#define pci_xen_init (0)
static inline int pci_xen_hvm_init(void)
{
return -1;
}
#endif
#if defined(CONFIG_XEN_DOM0)
void __init xen_setup_pirqs(void);
#else
static inline void __init xen_setup_pirqs(void)
{
}
#endif
#if defined(CONFIG_PCI_MSI)
#if defined(CONFIG_PCI_XEN)
/* The drivers/pci/xen-pcifront.c sets this structure to
* its own functions.
*/
struct xen_pci_frontend_ops {
int (*enable_msi)(struct pci_dev *dev, int **vectors);
void (*disable_msi)(struct pci_dev *dev);
int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
void (*disable_msix)(struct pci_dev *dev);
};
extern struct xen_pci_frontend_ops *xen_pci_frontend;
static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
int **vectors)
{
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
return xen_pci_frontend->enable_msi(dev, vectors);
return -ENODEV;
}
static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
{
if (xen_pci_frontend && xen_pci_frontend->disable_msi)
xen_pci_frontend->disable_msi(dev);
}
static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
int **vectors, int nvec)
{
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
return -ENODEV;
}
static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
{
if (xen_pci_frontend && xen_pci_frontend->disable_msix)
xen_pci_frontend->disable_msix(dev);
}
#endif /* CONFIG_PCI_XEN */
#endif /* CONFIG_PCI_MSI */
#endif /* _ASM_X86_XEN_PCI_H */
......@@ -513,35 +513,62 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
return 0;
}
/*
* success: return IRQ number (>=0)
* failure: return < 0
*/
int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
unsigned int irq;
unsigned int plat_gsi = gsi;
#ifdef CONFIG_PCI
/*
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
if (trigger == ACPI_LEVEL_SENSITIVE)
eisa_set_level_irq(gsi);
}
if (trigger == ACPI_LEVEL_SENSITIVE)
eisa_set_level_irq(gsi);
#endif
return gsi;
}
static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
#ifdef CONFIG_X86_IO_APIC
if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
}
gsi = mp_register_gsi(dev, gsi, trigger, polarity);
#endif
return gsi;
}
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
/*
* success: return IRQ number (>=0)
* failure: return < 0
*/
int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
{
unsigned int irq;
unsigned int plat_gsi = gsi;
plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
irq = gsi_to_irq(plat_gsi);
return irq;
}
void __init acpi_set_irq_model_pic(void)
{
acpi_irq_model = ACPI_IRQ_MODEL_PIC;
__acpi_register_gsi = acpi_register_gsi_pic;
acpi_ioapic = 0;
}
void __init acpi_set_irq_model_ioapic(void)
{
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
__acpi_register_gsi = acpi_register_gsi_ioapic;
acpi_ioapic = 1;
}
/*
* ACPI based hotplug support for CPU
*/
......@@ -1259,8 +1286,7 @@ static void __init acpi_process_madt(void)
*/
error = acpi_parse_madt_ioapic_entries();
if (!error) {
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
acpi_ioapic = 1;
acpi_set_irq_model_ioapic();
smp_found_config = 1;
}
......
......@@ -3331,7 +3331,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
return 0;
}
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
......@@ -3389,7 +3389,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return ret;
}
void arch_teardown_msi_irq(unsigned int irq)
void native_teardown_msi_irq(unsigned int irq)
{
destroy_irq(irq);
}
......@@ -3650,6 +3650,11 @@ void __init probe_nr_irqs_gsi(void)
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
int get_nr_irqs_gsi(void)
{
return nr_irqs_gsi;
}
#ifdef CONFIG_SPARSE_IRQ
int __init arch_probe_nr_irqs(void)
{
......
......@@ -6,10 +6,12 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
#include <asm/pci_x86.h>
#include <asm/pci.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/apic.h>
......@@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = {
};
EXPORT_SYMBOL_GPL(x86_platform);
struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
.teardown_msi_irq = native_teardown_msi_irq,
.teardown_msi_irqs = default_teardown_msi_irqs,
};
......@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
obj-$(CONFIG_PCI_OLPC) += olpc.o
obj-$(CONFIG_PCI_XEN) += xen.o
obj-y += fixup.o
obj-$(CONFIG_ACPI) += acpi.o
......
......@@ -421,16 +421,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
return bus;
}
int __init pcibios_init(void)
void __init pcibios_set_cache_line_size(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
if (!raw_pci_ops) {
printk(KERN_WARNING "PCI: System does not support PCI\n");
return 0;
}
/*
* Set PCI cacheline size to that of the CPU if the CPU has reported it.
* (For older CPUs that don't support cpuid, we se it to 32 bytes
......@@ -445,7 +439,16 @@ int __init pcibios_init(void)
pci_dfl_cache_line_size = 32 >> 2;
printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");
}
}
int __init pcibios_init(void)
{
if (!raw_pci_ops) {
printk(KERN_WARNING "PCI: System does not support PCI\n");
return 0;
}
pcibios_set_cache_line_size();
pcibios_resource_survey();
if (pci_bf_sort >= pci_force_bf)
......
......@@ -316,6 +316,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
*/
prot |= _PAGE_CACHE_UC_MINUS;
prot |= _PAGE_IOMAP; /* creating a mapping for IO */
vma->vm_page_prot = __pgprot(prot);
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
......
/*
* Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
* x86 PCI core to support the Xen PCI Frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/io.h>
#include <asm/io_apic.h>
#include <asm/pci_x86.h>
#include <asm/xen/hypervisor.h>
#include <xen/features.h>
#include <xen/events.h>
#include <asm/xen/pci.h>
#ifdef CONFIG_ACPI
static int xen_hvm_register_pirq(u32 gsi, int triggering)
{
int rc, irq;
struct physdev_map_pirq map_irq;
int shareable = 0;
char *name;
if (!xen_hvm_domain())
return -1;
map_irq.domid = DOMID_SELF;
map_irq.type = MAP_PIRQ_TYPE_GSI;
map_irq.index = gsi;
map_irq.pirq = -1;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
if (rc) {
printk(KERN_WARNING "xen map irq failed %d\n", rc);
return -1;
}
if (triggering == ACPI_EDGE_SENSITIVE) {
shareable = 0;
name = "ioapic-edge";
} else {
shareable = 1;
name = "ioapic-level";
}
irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name);
printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
return irq;
}
static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
int trigger, int polarity)
{
return xen_hvm_register_pirq(gsi, trigger);
}
#endif
#if defined(CONFIG_PCI_MSI)
#include <linux/msi.h>
#include <asm/msidef.h>
struct xen_pci_frontend_ops *xen_pci_frontend;
EXPORT_SYMBOL_GPL(xen_pci_frontend);
static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
struct msi_msg *msg)
{
/* We set vector == 0 to tell the hypervisor we don't care about it,
* but we want a pirq setup instead.
* We use the dest_id field to pass the pirq that we want. */
msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
msg->address_lo =
MSI_ADDR_BASE_LO |
MSI_ADDR_DEST_MODE_PHYSICAL |
MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DEST_ID(pirq);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
/* delivery mode reserved */
(3 << 8) |
MSI_DATA_VECTOR(0);
}
static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int irq, pirq, ret = 0;
struct msi_desc *msidesc;
struct msi_msg msg;
list_for_each_entry(msidesc, &dev->msi_list, list) {
xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
"msi-x" : "msi", &irq, &pirq);
if (irq < 0 || pirq < 0)
goto error;
printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
xen_msi_compose_msg(dev, pirq, &msg);
ret = set_irq_msi(irq, msidesc);
if (ret < 0)
goto error_while;
write_msi_msg(irq, &msg);
}
return 0;
error_while:
unbind_from_irqhandler(irq, NULL);
error:
if (ret == -ENODEV)
dev_err(&dev->dev, "Xen PCI frontend has not registered" \
" MSI/MSI-X support!\n");
return ret;
}
/*
* For MSI interrupts we have to use drivers/xen/event.s functions to
* allocate an irq_desc and setup the right */
static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int irq, ret, i;
struct msi_desc *msidesc;
int *v;
v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
if (!v)
return -ENOMEM;
if (type == PCI_CAP_ID_MSIX)
ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
else
ret = xen_pci_frontend_enable_msi(dev, &v);
if (ret)
goto error;
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = xen_allocate_pirq(v[i], 0, /* not sharable */
(type == PCI_CAP_ID_MSIX) ?
"pcifront-msi-x" : "pcifront-msi");
if (irq < 0)
return -1;
ret = set_irq_msi(irq, msidesc);
if (ret)
goto error_while;
i++;
}
kfree(v);
return 0;
error_while:
unbind_from_irqhandler(irq, NULL);
error:
if (ret == -ENODEV)
dev_err(&dev->dev, "Xen PCI frontend has not registered" \
" MSI/MSI-X support!\n");
kfree(v);
return ret;
}
static void xen_teardown_msi_irqs(struct pci_dev *dev)
{
struct msi_desc *msidesc;
msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
if (msidesc->msi_attrib.is_msix)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
}
static void xen_teardown_msi_irq(unsigned int irq)
{
xen_destroy_irq(irq);
}
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int irq, ret;
struct msi_desc *msidesc;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = xen_create_msi_irq(dev, msidesc, type);
if (irq < 0)
return -1;
ret = set_irq_msi(irq, msidesc);
if (ret)
goto error;
}
return 0;
error:
xen_destroy_irq(irq);
return ret;
}
#endif
static int xen_pcifront_enable_irq(struct pci_dev *dev)
{
int rc;
int share = 1;
dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
if (dev->irq < 0)
return -EINVAL;
if (dev->irq < NR_IRQS_LEGACY)
share = 0;
rc = xen_allocate_pirq(dev->irq, share, "pcifront");
if (rc < 0) {
dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
dev->irq, rc);
return rc;
}
return 0;
}
int __init pci_xen_init(void)
{
if (!xen_pv_domain() || xen_initial_domain())
return -ENODEV;
printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
pcibios_set_cache_line_size();
pcibios_enable_irq = xen_pcifront_enable_irq;
pcibios_disable_irq = NULL;
#ifdef CONFIG_ACPI
/* Keep ACPI out of the picture */
acpi_noirq = 1;
#endif
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
#endif
return 0;
}
int __init pci_xen_hvm_init(void)
{
if (!xen_feature(XENFEAT_hvm_pirqs))
return 0;
#ifdef CONFIG_ACPI
/*
* We don't want to change the actual ACPI delivery model,
* just how GSIs get registered.
*/
__acpi_register_gsi = acpi_register_gsi_xen_hvm;
#endif
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
#endif
return 0;
}
#ifdef CONFIG_XEN_DOM0
static int xen_register_pirq(u32 gsi, int triggering)
{
int rc, irq;
struct physdev_map_pirq map_irq;
int shareable = 0;
char *name;
if (!xen_pv_domain())
return -1;
if (triggering == ACPI_EDGE_SENSITIVE) {
shareable = 0;
name = "ioapic-edge";
} else {
shareable = 1;
name = "ioapic-level";
}
irq = xen_allocate_pirq(gsi, shareable, name);
printk(KERN_DEBUG "xen: --> irq=%d\n", irq);
if (irq < 0)
goto out;
map_irq.domid = DOMID_SELF;
map_irq.type = MAP_PIRQ_TYPE_GSI;
map_irq.index = gsi;
map_irq.pirq = irq;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
if (rc) {
printk(KERN_WARNING "xen map irq failed %d\n", rc);
return -1;
}
out:
return irq;
}
static int xen_register_gsi(u32 gsi, int triggering, int polarity)
{
int rc, irq;
struct physdev_setup_gsi setup_gsi;
if (!xen_pv_domain())
return -1;
printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
gsi, triggering, polarity);
irq = xen_register_pirq(gsi, triggering);
setup_gsi.gsi = gsi;
setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
if (rc == -EEXIST)
printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
else if (rc) {
printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
gsi, rc);
}
return irq;
}
static __init void xen_setup_acpi_sci(void)
{
int rc;
int trigger, polarity;
int gsi = acpi_sci_override_gsi;
if (!gsi)
return;
rc = acpi_get_override_irq(gsi, &trigger, &polarity);
if (rc) {
printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
" sci, rc=%d\n", rc);
return;
}
trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
"polarity=%d\n", gsi, trigger, polarity);
gsi = xen_register_gsi(gsi, trigger, polarity);
printk(KERN_INFO "xen: acpi sci %d\n", gsi);
return;
}
static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
int trigger, int polarity)
{
return xen_register_gsi(gsi, trigger, polarity);
}
static int __init pci_xen_initial_domain(void)
{
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
#endif
xen_setup_acpi_sci();
__acpi_register_gsi = acpi_register_gsi_xen;
return 0;
}
void __init xen_setup_pirqs(void)
{
int irq;
pci_xen_initial_domain();
if (0 == nr_ioapics) {
for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
xen_allocate_pirq(irq, 0, "xt-pic");
return;
}
/* Pre-allocate legacy irqs */
for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
int trigger, polarity;
if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
continue;
xen_register_pirq(irq,
trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
}
}
#endif
......@@ -13,6 +13,16 @@ config XEN
kernel to boot in a paravirtualized environment under the
Xen hypervisor.
config XEN_DOM0
def_bool y
depends on XEN && PCI_XEN && SWIOTLB_XEN
depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
# name in tools.
config XEN_PRIVILEGED_GUEST
def_bool XEN_DOM0
config XEN_PVHVM
def_bool y
depends on XEN
......
......@@ -46,6 +46,7 @@
#include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h>
#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include <asm/fixmap.h>
......@@ -236,6 +237,7 @@ static __init void xen_init_cpuid_mask(void)
cpuid_leaf1_edx_mask =
~((1 << X86_FEATURE_MCE) | /* disable MCE */
(1 << X86_FEATURE_MCA) | /* disable MCA */
(1 << X86_FEATURE_MTRR) | /* disable MTRR */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
if (!xen_initial_domain())
......@@ -1184,6 +1186,7 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
xen_ident_map_ISA();
/* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list();
......@@ -1222,6 +1225,8 @@ asmlinkage void __init xen_start_kernel(void)
add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL);
if (pci_xen)
x86_init.pci.arch_init = pci_xen_init;
} else {
/* Make sure ACS will be enabled */
pci_request_acs();
......
......@@ -1975,6 +1975,7 @@ static void *m2v(phys_addr_t maddr)
return __ka(m2p(maddr));
}
/* Set the page permissions on an identity-mapped pages */
static void set_page_prot(void *addr, pgprot_t prot)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
......@@ -2159,6 +2160,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
}
#endif /* CONFIG_X86_64 */
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
pte_t pte;
......@@ -2178,9 +2181,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
# endif
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
#endif
#ifdef CONFIG_X86_LOCAL_APIC
case FIX_APIC_BASE: /* maps dummy local APIC */
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
......@@ -2188,6 +2188,22 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
pte = pfn_pte(phys, prot);
break;
#ifdef CONFIG_X86_LOCAL_APIC
case FIX_APIC_BASE: /* maps dummy local APIC */
pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
break;
#endif
#ifdef CONFIG_X86_IO_APIC
case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
/*
* We just don't map the IO APIC - all access is via
* hypercalls. Keep the address in the pte for reference.
*/
pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
break;
#endif
case FIX_PARAVIRT_BOOTMAP:
/* This is an MFN, but it isn't an IO mapping from the
IO domain */
......@@ -2212,6 +2228,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
__init void xen_ident_map_ISA(void)
{
unsigned long pa;
/*
* If we're dom0, then linear map the ISA machine addresses into
* the kernel's address space.
*/
if (!xen_initial_domain())
return;
xen_raw_printk("Xen: setup ISA identity maps\n");
for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
BUG();
}
xen_flush_tlb();
}
static __init void xen_post_allocator_init(void)
{
pv_mmu_ops.set_pte = xen_set_pte;
......@@ -2320,6 +2359,8 @@ void __init xen_init_mmu_ops(void)
pv_mmu_ops = xen_mmu_ops;
vmap_lazy_unmap = false;
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
......
/* Glue code to lib/swiotlb-xen.c */
#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <xen/swiotlb-xen.h>
#include <asm/xen/hypervisor.h>
......@@ -55,6 +56,9 @@ void __init pci_xen_swiotlb_init(void)
if (xen_swiotlb) {
xen_swiotlb_init(1);
dma_ops = &xen_swiotlb_dma_ops;
/* Make sure ACS will be enabled */
pci_request_acs();
}
}
IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
......
......@@ -204,6 +204,9 @@ char * __init xen_memory_setup(void)
* Even though this is normal, usable memory under Xen, reserve
* ISA memory anyway because too many things think they can poke
* about in there.
*
* In a dom0 kernel, this region is identity mapped with the
* hardware ISA area, so it really is out of bounds.
*/
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
......@@ -367,7 +370,5 @@ void __init xen_arch_setup(void)
pm_idle = xen_idle;
paravirt_disable_iospace();
fiddle_vdso();
}
......@@ -28,6 +28,7 @@
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
#include <xen/xen.h>
#include <xen/page.h>
#include <xen/events.h>
......@@ -156,11 +157,35 @@ static void __init xen_fill_possible_map(void)
{
int i, rc;
if (xen_initial_domain())
return;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
}
}
}
static void __init xen_filter_cpu_maps(void)
{
int i, rc;
if (!xen_initial_domain())
return;
num_processors = 0;
disabled_cpus = 0;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
} else {
set_cpu_possible(i, false);
set_cpu_present(i, false);
}
}
}
......@@ -174,6 +199,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
old memory can be recycled */
make_lowmem_page_readwrite(xen_initial_gdt);
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
......
......@@ -1112,6 +1112,8 @@ static void blkback_changed(struct xenbus_device *dev,
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
case XenbusStateUnknown:
case XenbusStateClosed:
break;
......
......@@ -79,7 +79,7 @@ static int __write_console(const char *data, int len)
return sent;
}
static int write_console(uint32_t vtermno, const char *data, int len)
static int domU_write_console(uint32_t vtermno, const char *data, int len)
{
int ret = len;
......@@ -102,7 +102,7 @@ static int write_console(uint32_t vtermno, const char *data, int len)
return ret;
}
static int read_console(uint32_t vtermno, char *buf, int len)
static int domU_read_console(uint32_t vtermno, char *buf, int len)
{
struct xencons_interface *intf = xencons_interface();
XENCONS_RING_IDX cons, prod;
......@@ -123,28 +123,62 @@ static int read_console(uint32_t vtermno, char *buf, int len)
return recv;
}
static const struct hv_ops hvc_ops = {
.get_chars = read_console,
.put_chars = write_console,
static struct hv_ops domU_hvc_ops = {
.get_chars = domU_read_console,
.put_chars = domU_write_console,
.notifier_add = notifier_add_irq,
.notifier_del = notifier_del_irq,
.notifier_hangup = notifier_hangup_irq,
};
static int __init xen_init(void)
static int dom0_read_console(uint32_t vtermno, char *buf, int len)
{
return HYPERVISOR_console_io(CONSOLEIO_read, len, buf);
}
/*
* Either for a dom0 to write to the system console, or a domU with a
* debug version of Xen
*/
static int dom0_write_console(uint32_t vtermno, const char *str, int len)
{
int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
if (rc < 0)
return 0;
return len;
}
static struct hv_ops dom0_hvc_ops = {
.get_chars = dom0_read_console,
.put_chars = dom0_write_console,
.notifier_add = notifier_add_irq,
.notifier_del = notifier_del_irq,
.notifier_hangup = notifier_hangup_irq,
};
static int __init xen_hvc_init(void)
{
struct hvc_struct *hp;
struct hv_ops *ops;
if (!xen_pv_domain() ||
xen_initial_domain() ||
!xen_start_info->console.domU.evtchn)
if (!xen_pv_domain())
return -ENODEV;
xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
if (xen_initial_domain()) {
ops = &dom0_hvc_ops;
xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0);
} else {
if (!xen_start_info->console.domU.evtchn)
return -ENODEV;
ops = &domU_hvc_ops;
xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
}
if (xencons_irq < 0)
xencons_irq = 0; /* NO_IRQ */
hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256);
if (IS_ERR(hp))
return PTR_ERR(hp);
......@@ -161,7 +195,7 @@ void xen_console_resume(void)
rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
}
static void __exit xen_fini(void)
static void __exit xen_hvc_fini(void)
{
if (hvc)
hvc_remove(hvc);
......@@ -169,29 +203,24 @@ static void __exit xen_fini(void)
static int xen_cons_init(void)
{
struct hv_ops *ops;
if (!xen_pv_domain())
return 0;
hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
if (xen_initial_domain())
ops = &dom0_hvc_ops;
else
ops = &domU_hvc_ops;
hvc_instantiate(HVC_COOKIE, 0, ops);
return 0;
}
module_init(xen_init);
module_exit(xen_fini);
module_init(xen_hvc_init);
module_exit(xen_hvc_fini);
console_initcall(xen_cons_init);
static void raw_console_write(const char *str, int len)
{
while(len > 0) {
int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
if (rc <= 0)
break;
str += rc;
len -= rc;
}
}
#ifdef CONFIG_EARLY_PRINTK
static void xenboot_write_console(struct console *console, const char *string,
unsigned len)
......@@ -199,19 +228,22 @@ static void xenboot_write_console(struct console *console, const char *string,
unsigned int linelen, off = 0;
const char *pos;
raw_console_write(string, len);
dom0_write_console(0, string, len);
if (xen_initial_domain())
return;
write_console(0, "(early) ", 8);
domU_write_console(0, "(early) ", 8);
while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
linelen = pos-string+off;
if (off + linelen > len)
break;
write_console(0, string+off, linelen);
write_console(0, "\r\n", 2);
domU_write_console(0, string+off, linelen);
domU_write_console(0, "\r\n", 2);
off += linelen + 1;
}
if (off < len)
write_console(0, string+off, len-off);
domU_write_console(0, string+off, len-off);
}
struct console xenboot_console = {
......@@ -223,7 +255,7 @@ struct console xenboot_console = {
void xen_raw_console_write(const char *str)
{
raw_console_write(str, strlen(str));
dom0_write_console(0, str, strlen(str));
}
void xen_raw_printk(const char *fmt, ...)
......
......@@ -276,6 +276,8 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
switch (backend_state) {
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
case XenbusStateUnknown:
case XenbusStateClosed:
break;
......
......@@ -1610,6 +1610,8 @@ static void netback_changed(struct xenbus_device *dev,
switch (backend_state) {
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
case XenbusStateConnected:
case XenbusStateUnknown:
case XenbusStateClosed:
......
......@@ -40,6 +40,27 @@ config PCI_STUB
When in doubt, say N.
config XEN_PCIDEV_FRONTEND
tristate "Xen PCI Frontend"
depends on PCI && X86 && XEN
select HOTPLUG
select PCI_XEN
default y
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
config XEN_PCIDEV_FE_DEBUG
bool "Xen PCI Frontend debugging"
depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG
help
Say Y here if you want the Xen PCI frontend to produce a bunch of debug
messages to the system log. Select this if you are having a
problem with Xen PCI frontend support and want to see more of what is
going on.
When in doubt, say N.
config HT_IRQ
bool "Interrupts on hypertransport devices"
default y
......
......@@ -65,4 +65,6 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
obj-$(CONFIG_PCI_STUB) += pci-stub.o
obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
......@@ -342,6 +342,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
}
up_read(&pci_bus_sem);
}
EXPORT_SYMBOL_GPL(pci_walk_bus);
EXPORT_SYMBOL(pci_bus_alloc_resource);
EXPORT_SYMBOL_GPL(pci_bus_add_device);
......
......@@ -35,7 +35,12 @@ int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
#endif
#ifndef arch_setup_msi_irqs
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
# define arch_setup_msi_irqs default_setup_msi_irqs
# define HAVE_DEFAULT_MSI_SETUP_IRQS
#endif
#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS
int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
struct msi_desc *entry;
int ret;
......@@ -60,7 +65,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
#endif
#ifndef arch_teardown_msi_irqs
void arch_teardown_msi_irqs(struct pci_dev *dev)
# define arch_teardown_msi_irqs default_teardown_msi_irqs
# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
#endif
#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS
void default_teardown_msi_irqs(struct pci_dev *dev)
{
struct msi_desc *entry;
......
/*
* Xen PCI Frontend.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#include <xen/page.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/msi.h>
#include <xen/xenbus.h>
#include <xen/interface/io/pciif.h>
#include <asm/xen/pci.h>
#include <linux/interrupt.h>
#include <asm/atomic.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/time.h>
#define INVALID_GRANT_REF (0)
#define INVALID_EVTCHN (-1)
struct pci_bus_entry {
struct list_head list;
struct pci_bus *bus;
};
#define _PDEVB_op_active (0)
#define PDEVB_op_active (1 << (_PDEVB_op_active))
struct pcifront_device {
struct xenbus_device *xdev;
struct list_head root_buses;
int evtchn;
int gnt_ref;
int irq;
/* Lock this when doing any operations in sh_info */
spinlock_t sh_info_lock;
struct xen_pci_sharedinfo *sh_info;
struct work_struct op_work;
unsigned long flags;
};
struct pcifront_sd {
int domain;
struct pcifront_device *pdev;
};
static inline struct pcifront_device *
pcifront_get_pdev(struct pcifront_sd *sd)
{
return sd->pdev;
}
static inline void pcifront_init_sd(struct pcifront_sd *sd,
unsigned int domain, unsigned int bus,
struct pcifront_device *pdev)
{
sd->domain = domain;
sd->pdev = pdev;
}
static DEFINE_SPINLOCK(pcifront_dev_lock);
static struct pcifront_device *pcifront_dev;
static int verbose_request;
module_param(verbose_request, int, 0644);
static int errno_to_pcibios_err(int errno)
{
switch (errno) {
case XEN_PCI_ERR_success:
return PCIBIOS_SUCCESSFUL;
case XEN_PCI_ERR_dev_not_found:
return PCIBIOS_DEVICE_NOT_FOUND;
case XEN_PCI_ERR_invalid_offset:
case XEN_PCI_ERR_op_failed:
return PCIBIOS_BAD_REGISTER_NUMBER;
case XEN_PCI_ERR_not_implemented:
return PCIBIOS_FUNC_NOT_SUPPORTED;
case XEN_PCI_ERR_access_denied:
return PCIBIOS_SET_FAILED;
}
return errno;
}
static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
{
if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
schedule_work(&pdev->op_work);
}
}
static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
{
int err = 0;
struct xen_pci_op *active_op = &pdev->sh_info->op;
unsigned long irq_flags;
evtchn_port_t port = pdev->evtchn;
unsigned irq = pdev->irq;
s64 ns, ns_timeout;
struct timeval tv;
spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
memcpy(active_op, op, sizeof(struct xen_pci_op));
/* Go */
wmb();
set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
notify_remote_via_evtchn(port);
/*
* We set a poll timeout of 3 seconds but give up on return after
* 2 seconds. It is better to time out too late rather than too early
* (in the latter case we end up continually re-executing poll() with a
* timeout in the past). 1s difference gives plenty of slack for error.
*/
do_gettimeofday(&tv);
ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
xen_clear_irq_pending(irq);
while (test_bit(_XEN_PCIF_active,
(unsigned long *)&pdev->sh_info->flags)) {
xen_poll_irq_timeout(irq, jiffies + 3*HZ);
xen_clear_irq_pending(irq);
do_gettimeofday(&tv);
ns = timeval_to_ns(&tv);
if (ns > ns_timeout) {
dev_err(&pdev->xdev->dev,
"pciback not responding!!!\n");
clear_bit(_XEN_PCIF_active,
(unsigned long *)&pdev->sh_info->flags);
err = XEN_PCI_ERR_dev_not_found;
goto out;
}
}
/*
* We might lose backend service request since we
* reuse same evtchn with pci_conf backend response. So re-schedule
* aer pcifront service.
*/
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&pdev->sh_info->flags)) {
dev_err(&pdev->xdev->dev,
"schedule aer pcifront service\n");
schedule_pcifront_aer_op(pdev);
}
memcpy(op, active_op, sizeof(struct xen_pci_op));
err = op->err;
out:
spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
return err;
}
/* Access to this function is spinlocked in drivers/pci/access.c */
static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *val)
{
int err = 0;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_conf_read,
.domain = pci_domain_nr(bus),
.bus = bus->number,
.devfn = devfn,
.offset = where,
.size = size,
};
struct pcifront_sd *sd = bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
if (verbose_request)
dev_info(&pdev->xdev->dev,
"read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), where, size);
err = do_pci_op(pdev, &op);
if (likely(!err)) {
if (verbose_request)
dev_info(&pdev->xdev->dev, "read got back value %x\n",
op.value);
*val = op.value;
} else if (err == -ENODEV) {
/* No device here, pretend that it just returned 0 */
err = 0;
*val = 0;
}
return errno_to_pcibios_err(err);
}
/* Access to this function is spinlocked in drivers/pci/access.c */
static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 val)
{
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_conf_write,
.domain = pci_domain_nr(bus),
.bus = bus->number,
.devfn = devfn,
.offset = where,
.size = size,
.value = val,
};
struct pcifront_sd *sd = bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
if (verbose_request)
dev_info(&pdev->xdev->dev,
"write dev=%04x:%02x:%02x.%01x - "
"offset %x size %d val %x\n",
pci_domain_nr(bus), bus->number,
PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
return errno_to_pcibios_err(do_pci_op(pdev, &op));
}
struct pci_ops pcifront_bus_ops = {
.read = pcifront_bus_read,
.write = pcifront_bus_write,
};
#ifdef CONFIG_PCI_MSI
static int pci_frontend_enable_msix(struct pci_dev *dev,
int **vector, int nvec)
{
int err;
int i;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_enable_msix,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
.value = nvec,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
struct msi_desc *entry;
if (nvec > SH_INFO_MAX_VEC) {
dev_err(&dev->dev, "too much vector for pci frontend: %x."
" Increase SH_INFO_MAX_VEC.\n", nvec);
return -EINVAL;
}
i = 0;
list_for_each_entry(entry, &dev->msi_list, list) {
op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
/* Vector is useless at this point. */
op.msix_entries[i].vector = -1;
i++;
}
err = do_pci_op(pdev, &op);
if (likely(!err)) {
if (likely(!op.value)) {
/* we get the result */
for (i = 0; i < nvec; i++)
*(*vector+i) = op.msix_entries[i].vector;
return 0;
} else {
printk(KERN_DEBUG "enable msix get value %x\n",
op.value);
return op.value;
}
} else {
dev_err(&dev->dev, "enable msix get err %x\n", err);
return err;
}
}
static void pci_frontend_disable_msix(struct pci_dev *dev)
{
int err;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_disable_msix,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
err = do_pci_op(pdev, &op);
/* What should do for error ? */
if (err)
dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
}
static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
{
int err;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_enable_msi,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
err = do_pci_op(pdev, &op);
if (likely(!err)) {
*(*vector) = op.value;
} else {
dev_err(&dev->dev, "pci frontend enable msi failed for dev "
"%x:%x\n", op.bus, op.devfn);
err = -EINVAL;
}
return err;
}
static void pci_frontend_disable_msi(struct pci_dev *dev)
{
int err;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_disable_msi,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
err = do_pci_op(pdev, &op);
if (err == XEN_PCI_ERR_dev_not_found) {
/* XXX No response from backend, what shall we do? */
printk(KERN_DEBUG "get no response from backend for disable MSI\n");
return;
}
if (err)
/* how can pciback notify us fail? */
printk(KERN_DEBUG "get fake response frombackend\n");
}
static struct xen_pci_frontend_ops pci_frontend_ops = {
.enable_msi = pci_frontend_enable_msi,
.disable_msi = pci_frontend_disable_msi,
.enable_msix = pci_frontend_enable_msix,
.disable_msix = pci_frontend_disable_msix,
};
static void pci_frontend_registrar(int enable)
{
if (enable)
xen_pci_frontend = &pci_frontend_ops;
else
xen_pci_frontend = NULL;
};
#else
static inline void pci_frontend_registrar(int enable) { };
#endif /* CONFIG_PCI_MSI */
/* Claim resources for the PCI frontend as-is, backend won't allow changes */
static int pcifront_claim_resource(struct pci_dev *dev, void *data)
{
struct pcifront_device *pdev = data;
int i;
struct resource *r;
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
r = &dev->resource[i];
if (!r->parent && r->start && r->flags) {
dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
pci_name(dev), i);
if (pci_claim_resource(dev, i)) {
dev_err(&pdev->xdev->dev, "Could not claim "
"resource %s/%d! Device offline. Try "
"giving less than 4GB to domain.\n",
pci_name(dev), i);
}
}
}
return 0;
}
static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus,
struct pci_bus *b)
{
struct pci_dev *d;
unsigned int devfn;
/* Scan the bus for functions and add.
* We omit handling of PCI bridge attachment because pciback prevents
* bridges from being exported.
*/
for (devfn = 0; devfn < 0x100; devfn++) {
d = pci_get_slot(b, devfn);
if (d) {
/* Device is already known. */
pci_dev_put(d);
continue;
}
d = pci_scan_single_device(b, devfn);
if (d)
dev_info(&pdev->xdev->dev, "New device on "
"%04x:%02x:%02x.%02x found.\n", domain, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
}
return 0;
}
static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus)
{
struct pci_bus *b;
struct pcifront_sd *sd = NULL;
struct pci_bus_entry *bus_entry = NULL;
int err = 0;
#ifndef CONFIG_PCI_DOMAINS
if (domain != 0) {
dev_err(&pdev->xdev->dev,
"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
dev_err(&pdev->xdev->dev,
"Please compile with CONFIG_PCI_DOMAINS\n");
err = -EINVAL;
goto err_out;
}
#endif
dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
domain, bus);
bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
sd = kmalloc(sizeof(*sd), GFP_KERNEL);
if (!bus_entry || !sd) {
err = -ENOMEM;
goto err_out;
}
pcifront_init_sd(sd, domain, bus, pdev);
b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
&pcifront_bus_ops, sd);
if (!b) {
dev_err(&pdev->xdev->dev,
"Error creating PCI Frontend Bus!\n");
err = -ENOMEM;
goto err_out;
}
bus_entry->bus = b;
list_add(&bus_entry->list, &pdev->root_buses);
/* pci_scan_bus_parented skips devices which do not have a have
* devfn==0. The pcifront_scan_bus enumerates all devfn. */
err = pcifront_scan_bus(pdev, domain, bus, b);
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
/* Create SysFS and notify udev of the devices. Aka: "going live" */
pci_bus_add_devices(b);
return err;
err_out:
kfree(bus_entry);
kfree(sd);
return err;
}
static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus)
{
int err;
struct pci_bus *b;
#ifndef CONFIG_PCI_DOMAINS
if (domain != 0) {
dev_err(&pdev->xdev->dev,
"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
dev_err(&pdev->xdev->dev,
"Please compile with CONFIG_PCI_DOMAINS\n");
return -EINVAL;
}
#endif
dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
domain, bus);
b = pci_find_bus(domain, bus);
if (!b)
/* If the bus is unknown, create it. */
return pcifront_scan_root(pdev, domain, bus);
err = pcifront_scan_bus(pdev, domain, bus, b);
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
/* Create SysFS and notify udev of the devices. Aka: "going live" */
pci_bus_add_devices(b);
return err;
}
static void free_root_bus_devs(struct pci_bus *bus)
{
struct pci_dev *dev;
while (!list_empty(&bus->devices)) {
dev = container_of(bus->devices.next, struct pci_dev,
bus_list);
dev_dbg(&dev->dev, "removing device\n");
pci_remove_bus_device(dev);
}
}
static void pcifront_free_roots(struct pcifront_device *pdev)
{
struct pci_bus_entry *bus_entry, *t;
dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
list_del(&bus_entry->list);
free_root_bus_devs(bus_entry->bus);
kfree(bus_entry->bus->sysdata);
device_unregister(bus_entry->bus->bridge);
pci_remove_bus(bus_entry->bus);
kfree(bus_entry);
}
}
static pci_ers_result_t pcifront_common_process(int cmd,
struct pcifront_device *pdev,
pci_channel_state_t state)
{
pci_ers_result_t result;
struct pci_driver *pdrv;
int bus = pdev->sh_info->aer_op.bus;
int devfn = pdev->sh_info->aer_op.devfn;
struct pci_dev *pcidev;
int flag = 0;
dev_dbg(&pdev->xdev->dev,
"pcifront AER process: cmd %x (bus:%x, devfn%x)",
cmd, bus, devfn);
result = PCI_ERS_RESULT_NONE;
pcidev = pci_get_bus_and_slot(bus, devfn);
if (!pcidev || !pcidev->driver) {
dev_err(&pcidev->dev,
"device or driver is NULL\n");
return result;
}
pdrv = pcidev->driver;
if (get_driver(&pdrv->driver)) {
if (pdrv->err_handler && pdrv->err_handler->error_detected) {
dev_dbg(&pcidev->dev,
"trying to call AER service\n");
if (pcidev) {
flag = 1;
switch (cmd) {
case XEN_PCI_OP_aer_detected:
result = pdrv->err_handler->
error_detected(pcidev, state);
break;
case XEN_PCI_OP_aer_mmio:
result = pdrv->err_handler->
mmio_enabled(pcidev);
break;
case XEN_PCI_OP_aer_slotreset:
result = pdrv->err_handler->
slot_reset(pcidev);
break;
case XEN_PCI_OP_aer_resume:
pdrv->err_handler->resume(pcidev);
break;
default:
dev_err(&pdev->xdev->dev,
"bad request in aer recovery "
"operation!\n");
}
}
}
put_driver(&pdrv->driver);
}
if (!flag)
result = PCI_ERS_RESULT_NONE;
return result;
}
static void pcifront_do_aer(struct work_struct *data)
{
struct pcifront_device *pdev =
container_of(data, struct pcifront_device, op_work);
int cmd = pdev->sh_info->aer_op.cmd;
pci_channel_state_t state =
(pci_channel_state_t)pdev->sh_info->aer_op.err;
/*If a pci_conf op is in progress,
we have to wait until it is done before service aer op*/
dev_dbg(&pdev->xdev->dev,
"pcifront service aer bus %x devfn %x\n",
pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
/* Post the operation to the guest. */
wmb();
clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
notify_remote_via_evtchn(pdev->evtchn);
/*in case of we lost an aer request in four lines time_window*/
smp_mb__before_clear_bit();
clear_bit(_PDEVB_op_active, &pdev->flags);
smp_mb__after_clear_bit();
schedule_pcifront_aer_op(pdev);
}
static irqreturn_t pcifront_handler_aer(int irq, void *dev)
{
struct pcifront_device *pdev = dev;
schedule_pcifront_aer_op(pdev);
return IRQ_HANDLED;
}
static int pcifront_connect(struct pcifront_device *pdev)
{
int err = 0;
spin_lock(&pcifront_dev_lock);
if (!pcifront_dev) {
dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
pcifront_dev = pdev;
} else {
dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
err = -EEXIST;
}
spin_unlock(&pcifront_dev_lock);
return err;
}
static void pcifront_disconnect(struct pcifront_device *pdev)
{
spin_lock(&pcifront_dev_lock);
if (pdev == pcifront_dev) {
dev_info(&pdev->xdev->dev,
"Disconnecting PCI Frontend Buses\n");
pcifront_dev = NULL;
}
spin_unlock(&pcifront_dev_lock);
}
static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
{
struct pcifront_device *pdev;
pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
if (pdev == NULL)
goto out;
pdev->sh_info =
(struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
if (pdev->sh_info == NULL) {
kfree(pdev);
pdev = NULL;
goto out;
}
pdev->sh_info->flags = 0;
/*Flag for registering PV AER handler*/
set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
dev_set_drvdata(&xdev->dev, pdev);
pdev->xdev = xdev;
INIT_LIST_HEAD(&pdev->root_buses);
spin_lock_init(&pdev->sh_info_lock);
pdev->evtchn = INVALID_EVTCHN;
pdev->gnt_ref = INVALID_GRANT_REF;
pdev->irq = -1;
INIT_WORK(&pdev->op_work, pcifront_do_aer);
dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
pdev, pdev->sh_info);
out:
return pdev;
}
static void free_pdev(struct pcifront_device *pdev)
{
dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
pcifront_free_roots(pdev);
/*For PCIE_AER error handling job*/
flush_scheduled_work();
if (pdev->irq >= 0)
unbind_from_irqhandler(pdev->irq, pdev);
if (pdev->evtchn != INVALID_EVTCHN)
xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
if (pdev->gnt_ref != INVALID_GRANT_REF)
gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
(unsigned long)pdev->sh_info);
else
free_page((unsigned long)pdev->sh_info);
dev_set_drvdata(&pdev->xdev->dev, NULL);
kfree(pdev);
}
static int pcifront_publish_info(struct pcifront_device *pdev)
{
int err = 0;
struct xenbus_transaction trans;
err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
if (err < 0)
goto out;
pdev->gnt_ref = err;
err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
if (err)
goto out;
err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
0, "pcifront", pdev);
if (err < 0)
return err;
pdev->irq = err;
do_publish:
err = xenbus_transaction_start(&trans);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error writing configuration for backend "
"(start transaction)");
goto out;
}
err = xenbus_printf(trans, pdev->xdev->nodename,
"pci-op-ref", "%u", pdev->gnt_ref);
if (!err)
err = xenbus_printf(trans, pdev->xdev->nodename,
"event-channel", "%u", pdev->evtchn);
if (!err)
err = xenbus_printf(trans, pdev->xdev->nodename,
"magic", XEN_PCI_MAGIC);
if (err) {
xenbus_transaction_end(trans, 1);
xenbus_dev_fatal(pdev->xdev, err,
"Error writing configuration for backend");
goto out;
} else {
err = xenbus_transaction_end(trans, 0);
if (err == -EAGAIN)
goto do_publish;
else if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error completing transaction "
"for backend");
goto out;
}
}
xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
out:
return err;
}
static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
{
int err = -EFAULT;
int i, num_roots, len;
char str[64];
unsigned int domain, bus;
/* Only connect once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitialised)
goto out;
err = pcifront_connect(pdev);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error connecting PCI Frontend");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
"root_num", "%d", &num_roots);
if (err == -ENOENT) {
xenbus_dev_error(pdev->xdev, err,
"No PCI Roots found, trying 0000:00");
err = pcifront_scan_root(pdev, 0, 0);
num_roots = 0;
} else if (err != 1) {
if (err == 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of PCI roots");
goto out;
}
for (i = 0; i < num_roots; i++) {
len = snprintf(str, sizeof(str), "root-%d", i);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
"%x:%x", &domain, &bus);
if (err != 2) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading PCI root %d", i);
goto out;
}
err = pcifront_scan_root(pdev, domain, bus);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error scanning PCI root %04x:%02x",
domain, bus);
goto out;
}
}
err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
out:
return err;
}
static int pcifront_try_disconnect(struct pcifront_device *pdev)
{
int err = 0;
enum xenbus_state prev_state;
prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
if (prev_state >= XenbusStateClosing)
goto out;
if (prev_state == XenbusStateConnected) {
pcifront_free_roots(pdev);
pcifront_disconnect(pdev);
}
err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
out:
return err;
}
static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
{
int err = -EFAULT;
int i, num_roots, len;
unsigned int domain, bus;
char str[64];
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateReconfiguring)
goto out;
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
"root_num", "%d", &num_roots);
if (err == -ENOENT) {
xenbus_dev_error(pdev->xdev, err,
"No PCI Roots found, trying 0000:00");
err = pcifront_rescan_root(pdev, 0, 0);
num_roots = 0;
} else if (err != 1) {
if (err == 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of PCI roots");
goto out;
}
for (i = 0; i < num_roots; i++) {
len = snprintf(str, sizeof(str), "root-%d", i);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
"%x:%x", &domain, &bus);
if (err != 2) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading PCI root %d", i);
goto out;
}
err = pcifront_rescan_root(pdev, domain, bus);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error scanning PCI root %04x:%02x",
domain, bus);
goto out;
}
}
xenbus_switch_state(pdev->xdev, XenbusStateConnected);
out:
return err;
}
static int pcifront_detach_devices(struct pcifront_device *pdev)
{
int err = 0;
int i, num_devs;
unsigned int domain, bus, slot, func;
struct pci_bus *pci_bus;
struct pci_dev *pci_dev;
char str[64];
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateConnected)
goto out;
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
&num_devs);
if (err != 1) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of PCI devices");
goto out;
}
/* Find devices being detached and remove them. */
for (i = 0; i < num_devs; i++) {
int l, state;
l = snprintf(str, sizeof(str), "state-%d", i);
if (unlikely(l >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
&state);
if (err != 1)
state = XenbusStateUnknown;
if (state != XenbusStateClosing)
continue;
/* Remove device. */
l = snprintf(str, sizeof(str), "vdev-%d", i);
if (unlikely(l >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
"%x:%x:%x.%x", &domain, &bus, &slot, &func);
if (err != 4) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading PCI device %d", i);
goto out;
}
pci_bus = pci_find_bus(domain, bus);
if (!pci_bus) {
dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
domain, bus);
continue;
}
pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
if (!pci_dev) {
dev_dbg(&pdev->xdev->dev,
"Cannot get PCI device %04x:%02x:%02x.%02x\n",
domain, bus, slot, func);
continue;
}
pci_remove_bus_device(pci_dev);
pci_dev_put(pci_dev);
dev_dbg(&pdev->xdev->dev,
"PCI device %04x:%02x:%02x.%02x removed.\n",
domain, bus, slot, func);
}
err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
out:
return err;
}
static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
enum xenbus_state be_state)
{
struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
switch (be_state) {
case XenbusStateUnknown:
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
case XenbusStateClosed:
break;
case XenbusStateConnected:
pcifront_try_connect(pdev);
break;
case XenbusStateClosing:
dev_warn(&xdev->dev, "backend going away!\n");
pcifront_try_disconnect(pdev);
break;
case XenbusStateReconfiguring:
pcifront_detach_devices(pdev);
break;
case XenbusStateReconfigured:
pcifront_attach_devices(pdev);
break;
}
}
static int pcifront_xenbus_probe(struct xenbus_device *xdev,
const struct xenbus_device_id *id)
{
int err = 0;
struct pcifront_device *pdev = alloc_pdev(xdev);
if (pdev == NULL) {
err = -ENOMEM;
xenbus_dev_fatal(xdev, err,
"Error allocating pcifront_device struct");
goto out;
}
err = pcifront_publish_info(pdev);
if (err)
free_pdev(pdev);
out:
return err;
}
static int pcifront_xenbus_remove(struct xenbus_device *xdev)
{
struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
if (pdev)
free_pdev(pdev);
return 0;
}
static const struct xenbus_device_id xenpci_ids[] = {
{"pci"},
{""},
};
static struct xenbus_driver xenbus_pcifront_driver = {
.name = "pcifront",
.owner = THIS_MODULE,
.ids = xenpci_ids,
.probe = pcifront_xenbus_probe,
.remove = pcifront_xenbus_remove,
.otherend_changed = pcifront_backend_changed,
};
static int __init pcifront_init(void)
{
if (!xen_pv_domain() || xen_initial_domain())
return -ENODEV;
pci_frontend_registrar(1 /* enable */);
return xenbus_register_frontend(&xenbus_pcifront_driver);
}
static void __exit pcifront_cleanup(void)
{
xenbus_unregister_driver(&xenbus_pcifront_driver);
pci_frontend_registrar(0 /* disable */);
}
module_init(pcifront_init);
module_exit(pcifront_cleanup);
MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
MODULE_LICENSE("GPL");
MODULE_ALIAS("xen:pci");
......@@ -631,6 +631,8 @@ static void xenfb_backend_changed(struct xenbus_device *dev,
switch (backend_state) {
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
case XenbusStateUnknown:
case XenbusStateClosed:
break;
......
......@@ -74,6 +74,7 @@ config XEN_PLATFORM_PCI
config SWIOTLB_XEN
def_bool y
depends on SWIOTLB
depends on PCI
select SWIOTLB
endmenu
......@@ -4,6 +4,7 @@ obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_features.o := $(nostackp)
obj-$(CONFIG_BLOCK) += biomerge.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
......@@ -12,3 +13,4 @@ obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
#include <linux/bio.h>
#include <linux/io.h>
#include <xen/page.h>
bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
const struct bio_vec *vec2)
{
unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page));
unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page));
return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
((mfn1 == mfn2) || ((mfn1+1) == mfn2));
}
......@@ -16,7 +16,7 @@
* (typically dom0).
* 2. VIRQs, typically used for timers. These are per-cpu events.
* 3. IPIs.
* 4. Hardware interrupts. Not supported at present.
* 4. PIRQs - Hardware interrupts.
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
......@@ -28,12 +28,16 @@
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
#include <asm/desc.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/io_apic.h>
#include <asm/sync_bitops.h>
#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
......@@ -73,7 +77,8 @@ enum xen_irq_type {
* event channel - irq->event channel mapping
* cpu - cpu this event channel is bound to
* index - type-specific information:
* PIRQ - vector, with MSB being "needs EIO"
* PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
* guest, or GSI (real passthrough IRQ) of the device.
* VIRQ - virq number
* IPI - IPI vector
* EVTCHN -
......@@ -88,21 +93,30 @@ struct irq_info
unsigned short virq;
enum ipi_vector ipi;
struct {
unsigned short pirq;
unsigned short gsi;
unsigned short vector;
unsigned char vector;
unsigned char flags;
} pirq;
} u;
};
#define PIRQ_NEEDS_EOI (1 << 0)
#define PIRQ_SHAREABLE (1 << 1)
static struct irq_info irq_info[NR_IRQS];
static struct irq_info *irq_info;
static int *pirq_to_irq;
static int nr_pirqs;
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
[0 ... NR_EVENT_CHANNELS-1] = -1
};
static int *evtchn_to_irq;
struct cpu_evtchn_s {
unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
};
static struct cpu_evtchn_s *cpu_evtchn_mask_p;
static __initdata struct cpu_evtchn_s init_evtchn_mask = {
.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
};
static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
return cpu_evtchn_mask_p[cpu].bits;
......@@ -113,6 +127,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
static struct irq_chip xen_dynamic_chip;
static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip;
/* Constructor for packed IRQ information. */
static struct irq_info mk_unbound_info(void)
......@@ -138,11 +153,12 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
.cpu = 0, .u.virq = virq };
}
static struct irq_info mk_pirq_info(unsigned short evtchn,
static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
unsigned short gsi, unsigned short vector)
{
return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
.cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
.cpu = 0,
.u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
}
/*
......@@ -184,6 +200,16 @@ static unsigned virq_from_irq(unsigned irq)
return info->u.virq;
}
static unsigned pirq_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_PIRQ);
return info->u.pirq.pirq;
}
static unsigned gsi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
......@@ -225,6 +251,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
return ret;
}
static bool pirq_needs_eoi(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
static inline unsigned long active_evtchns(unsigned int cpu,
struct shared_info *sh,
unsigned int idx)
......@@ -336,12 +371,40 @@ static void unmask_evtchn(int port)
put_cpu();
}
static int get_nr_hw_irqs(void)
{
int ret = 1;
#ifdef CONFIG_X86_IO_APIC
ret = get_nr_irqs_gsi();
#endif
return ret;
}
/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs
* succeeded otherwise nr_pirqs won't hold the right value */
static int find_unbound_pirq(void)
{
int i;
for (i = nr_pirqs-1; i >= 0; i--) {
if (pirq_to_irq[i] < 0)
return i;
}
return -1;
}
static int find_unbound_irq(void)
{
struct irq_data *data;
int irq, res;
int start = get_nr_hw_irqs();
for (irq = 0; irq < nr_irqs; irq++) {
if (start == nr_irqs)
goto no_irqs;
/* nr_irqs is a magic value. Must not use it.*/
for (irq = nr_irqs-1; irq > start; irq--) {
data = irq_get_irq_data(irq);
/* only 0->15 have init'd desc; handle irq > 16 */
if (!data)
......@@ -354,8 +417,8 @@ static int find_unbound_irq(void)
return irq;
}
if (irq == nr_irqs)
panic("No available IRQ to bind to: increase nr_irqs!\n");
if (irq == start)
goto no_irqs;
res = irq_alloc_desc_at(irq, 0);
......@@ -363,6 +426,357 @@ static int find_unbound_irq(void)
return -1;
return irq;
no_irqs:
panic("No available IRQ to bind to: increase nr_irqs!\n");
}
static bool identity_mapped_irq(unsigned irq)
{
/* identity map all the hardware irqs */
return irq < get_nr_hw_irqs();
}
static void pirq_unmask_notify(int irq)
{
struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
if (unlikely(pirq_needs_eoi(irq))) {
int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
WARN_ON(rc);
}
}
static void pirq_query_unmask(int irq)
{
struct physdev_irq_status_query irq_status;
struct irq_info *info = info_for_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
irq_status.irq = pirq_from_irq(irq);
if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
irq_status.flags = 0;
info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
if (irq_status.flags & XENIRQSTAT_needs_eoi)
info->u.pirq.flags |= PIRQ_NEEDS_EOI;
}
static bool probing_irq(int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
return desc && desc->action == NULL;
}
static unsigned int startup_pirq(unsigned int irq)
{
struct evtchn_bind_pirq bind_pirq;
struct irq_info *info = info_for_irq(irq);
int evtchn = evtchn_from_irq(irq);
int rc;
BUG_ON(info->type != IRQT_PIRQ);
if (VALID_EVTCHN(evtchn))
goto out;
bind_pirq.pirq = pirq_from_irq(irq);
/* NB. We are happy to share unless we are probing. */
bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
BIND_PIRQ__WILL_SHARE : 0;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
if (rc != 0) {
if (!probing_irq(irq))
printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
irq);
return 0;
}
evtchn = bind_pirq.port;
pirq_query_unmask(irq);
evtchn_to_irq[evtchn] = irq;
bind_evtchn_to_cpu(evtchn, 0);
info->evtchn = evtchn;
out:
unmask_evtchn(evtchn);
pirq_unmask_notify(irq);
return 0;
}
static void shutdown_pirq(unsigned int irq)
{
struct evtchn_close close;
struct irq_info *info = info_for_irq(irq);
int evtchn = evtchn_from_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
if (!VALID_EVTCHN(evtchn))
return;
mask_evtchn(evtchn);
close.port = evtchn;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
info->evtchn = 0;
}
static void enable_pirq(unsigned int irq)
{
startup_pirq(irq);
}
static void disable_pirq(unsigned int irq)
{
}
static void ack_pirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
move_native_irq(irq);
if (VALID_EVTCHN(evtchn)) {
mask_evtchn(evtchn);
clear_evtchn(evtchn);
}
}
static void end_pirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
struct irq_desc *desc = irq_to_desc(irq);
if (WARN_ON(!desc))
return;
if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
(IRQ_DISABLED|IRQ_PENDING)) {
shutdown_pirq(irq);
} else if (VALID_EVTCHN(evtchn)) {
unmask_evtchn(evtchn);
pirq_unmask_notify(irq);
}
}
static int find_irq_by_gsi(unsigned gsi)
{
int irq;
for (irq = 0; irq < nr_irqs; irq++) {
struct irq_info *info = info_for_irq(irq);
if (info == NULL || info->type != IRQT_PIRQ)
continue;
if (gsi_from_irq(irq) == gsi)
return irq;
}
return -1;
}
int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
{
return xen_map_pirq_gsi(gsi, gsi, shareable, name);
}
/* xen_map_pirq_gsi might allocate irqs from the top down, as a
* consequence don't assume that the irq number returned has a low value
* or can be used as a pirq number unless you know otherwise.
*
* One notable exception is when xen_map_pirq_gsi is called passing an
* hardware gsi as argument, in that case the irq number returned
* matches the gsi number passed as second argument.
*
* Note: We don't assign an event channel until the irq actually started
* up. Return an existing irq if we've already got one for the gsi.
*/
int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
{
int irq = 0;
struct physdev_irq irq_op;
spin_lock(&irq_mapping_update_lock);
if ((pirq > nr_pirqs) || (gsi > nr_irqs)) {
printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n",
pirq > nr_pirqs ? "nr_pirqs" :"",
gsi > nr_irqs ? "nr_irqs" : "");
goto out;
}
irq = find_irq_by_gsi(gsi);
if (irq != -1) {
printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
irq, gsi);
goto out; /* XXX need refcount? */
}
/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
* we are using the !xen_initial_domain() to drop in the function.*/
if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
xen_pv_domain())) {
irq = gsi;
irq_alloc_desc_at(irq, 0);
} else
irq = find_unbound_irq();
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
handle_level_irq, name);
irq_op.irq = irq;
irq_op.vector = 0;
/* Only the privileged domain can do this. For non-priv, the pcifront
* driver provides a PCI bus that does the call to do exactly
* this in the priv domain. */
if (xen_initial_domain() &&
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
irq_free_desc(irq);
irq = -ENOSPC;
goto out;
}
irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
pirq_to_irq[pirq] = irq;
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
#ifdef CONFIG_PCI_MSI
#include <linux/msi.h>
#include "../pci/msi.h"
void xen_allocate_pirq_msi(char *name, int *irq, int *pirq)
{
spin_lock(&irq_mapping_update_lock);
*irq = find_unbound_irq();
if (*irq == -1)
goto out;
*pirq = find_unbound_pirq();
if (*pirq == -1)
goto out;
set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
handle_level_irq, name);
irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
pirq_to_irq[*pirq] = *irq;
out:
spin_unlock(&irq_mapping_update_lock);
}
int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
{
int irq = -1;
struct physdev_map_pirq map_irq;
int rc;
int pos;
u32 table_offset, bir;
memset(&map_irq, 0, sizeof(map_irq));
map_irq.domid = DOMID_SELF;
map_irq.type = MAP_PIRQ_TYPE_MSI;
map_irq.index = -1;
map_irq.pirq = -1;
map_irq.bus = dev->bus->number;
map_irq.devfn = dev->devfn;
if (type == PCI_CAP_ID_MSIX) {
pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
pci_read_config_dword(dev, msix_table_offset_reg(pos),
&table_offset);
bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
map_irq.table_base = pci_resource_start(dev, bir);
map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
}
spin_lock(&irq_mapping_update_lock);
irq = find_unbound_irq();
if (irq == -1)
goto out;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
if (rc) {
printk(KERN_WARNING "xen map irq failed %d\n", rc);
irq_free_desc(irq);
irq = -1;
goto out;
}
irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
handle_level_irq,
(type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
#endif
int xen_destroy_irq(int irq)
{
struct irq_desc *desc;
struct physdev_unmap_pirq unmap_irq;
struct irq_info *info = info_for_irq(irq);
int rc = -ENOENT;
spin_lock(&irq_mapping_update_lock);
desc = irq_to_desc(irq);
if (!desc)
goto out;
if (xen_initial_domain()) {
unmap_irq.pirq = info->u.pirq.gsi;
unmap_irq.domid = DOMID_SELF;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
if (rc) {
printk(KERN_WARNING "unmap irq failed %d\n", rc);
goto out;
}
}
irq_info[irq] = mk_unbound_info();
irq_free_desc(irq);
out:
spin_unlock(&irq_mapping_update_lock);
return rc;
}
int xen_vector_from_irq(unsigned irq)
{
return vector_from_irq(irq);
}
int xen_gsi_from_irq(unsigned irq)
{
return gsi_from_irq(irq);
}
int bind_evtchn_to_irq(unsigned int evtchn)
......@@ -425,7 +839,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
}
static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
int evtchn, irq;
......@@ -928,7 +1342,7 @@ void xen_clear_irq_pending(int irq)
if (VALID_EVTCHN(evtchn))
clear_evtchn(evtchn);
}
EXPORT_SYMBOL(xen_clear_irq_pending);
void xen_set_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
......@@ -948,9 +1362,9 @@ bool xen_test_irq_pending(int irq)
return ret;
}
/* Poll waiting for an irq to become pending. In the usual case, the
irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq)
/* Poll waiting for an irq to become pending with timeout. In the usual case,
* the irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq_timeout(int irq, u64 timeout)
{
evtchn_port_t evtchn = evtchn_from_irq(irq);
......@@ -958,13 +1372,20 @@ void xen_poll_irq(int irq)
struct sched_poll poll;
poll.nr_ports = 1;
poll.timeout = 0;
poll.timeout = timeout;
set_xen_guest_handle(poll.ports, &evtchn);
if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
BUG();
}
}
EXPORT_SYMBOL(xen_poll_irq_timeout);
/* Poll waiting for an irq to become pending. In the usual case, the
* irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq)
{
xen_poll_irq_timeout(irq, 0 /* no timeout */);
}
void xen_irq_resume(void)
{
......@@ -1001,6 +1422,26 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.retrigger = retrigger_dynirq,
};
static struct irq_chip xen_pirq_chip __read_mostly = {
.name = "xen-pirq",
.startup = startup_pirq,
.shutdown = shutdown_pirq,
.enable = enable_pirq,
.unmask = enable_pirq,
.disable = disable_pirq,
.mask = disable_pirq,
.ack = ack_pirq,
.end = end_pirq,
.set_affinity = set_affinity_irq,
.retrigger = retrigger_dynirq,
};
static struct irq_chip xen_percpu_chip __read_mostly = {
.name = "xen-percpu",
......@@ -1051,11 +1492,32 @@ void xen_callback_vector(void) {}
void __init xen_init_IRQ(void)
{
int i;
int i, rc;
struct physdev_nr_pirqs op_nr_pirqs;
cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
GFP_KERNEL);
BUG_ON(cpu_evtchn_mask_p == NULL);
irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs);
if (rc < 0) {
nr_pirqs = nr_irqs;
if (rc != -ENOSYS)
printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc);
} else {
if (xen_pv_domain() && !xen_initial_domain())
nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs);
else
nr_pirqs = op_nr_pirqs.nr_pirqs;
}
pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL);
for (i = 0; i < nr_pirqs; i++)
pirq_to_irq[i] = -1;
evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
GFP_KERNEL);
for (i = 0; i < NR_EVENT_CHANNELS; i++)
evtchn_to_irq[i] = -1;
init_evtchn_cpu_bindings();
......@@ -1066,7 +1528,12 @@ void __init xen_init_IRQ(void)
if (xen_hvm_domain()) {
xen_callback_vector();
native_init_IRQ();
/* pci_xen_hvm_init must be called after native_init_IRQ so that
* __acpi_register_gsi can point at the right function */
pci_xen_hvm_init();
} else {
irq_ctx_init(smp_processor_id());
if (xen_initial_domain())
xen_setup_pirqs();
}
}
/*
* Copyright (c) 2009, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Author: Weidong Han <weidong.han@intel.com>
*/
#include <linux/pci.h>
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/xen.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
static int xen_add_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
#ifdef CONFIG_PCI_IOV
if (pci_dev->is_virtfn) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
.is_virtfn = 1,
.physfn.bus = pci_dev->physfn->bus->number,
.physfn.devfn = pci_dev->physfn->devfn,
};
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
&manage_pci_ext);
} else
#endif
if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
.is_extfn = 1,
};
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
&manage_pci_ext);
} else {
struct physdev_manage_pci manage_pci = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
};
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add,
&manage_pci);
}
return r;
}
static int xen_remove_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
struct physdev_manage_pci manage_pci;
manage_pci.bus = pci_dev->bus->number;
manage_pci.devfn = pci_dev->devfn;
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
&manage_pci);
return r;
}
static int xen_pci_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
int r = 0;
switch (action) {
case BUS_NOTIFY_ADD_DEVICE:
r = xen_add_device(dev);
break;
case BUS_NOTIFY_DEL_DEVICE:
r = xen_remove_device(dev);
break;
default:
break;
}
return r;
}
struct notifier_block device_nb = {
.notifier_call = xen_pci_notifier,
};
static int __init register_xen_pci_notifier(void)
{
if (!xen_initial_domain())
return 0;
return bus_register_notifier(&pci_bus_type, &device_nb);
}
arch_initcall(register_xen_pci_notifier);
......@@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state)
[ XenbusStateConnected ] = "Connected",
[ XenbusStateClosing ] = "Closing",
[ XenbusStateClosed ] = "Closed",
[XenbusStateReconfiguring] = "Reconfiguring",
[XenbusStateReconfigured] = "Reconfigured",
};
return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
}
......
......@@ -803,6 +803,7 @@ device_initcall(xenbus_probe_initcall);
static int __init xenbus_init(void)
{
int err = 0;
unsigned long page = 0;
DPRINTK("");
......@@ -823,7 +824,31 @@ static int __init xenbus_init(void)
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
if (xen_initial_domain()) {
/* dom0 not yet supported */
struct evtchn_alloc_unbound alloc_unbound;
/* Allocate Xenstore page */
page = get_zeroed_page(GFP_KERNEL);
if (!page)
goto out_error;
xen_store_mfn = xen_start_info->store_mfn =
pfn_to_mfn(virt_to_phys((void *)page) >>
PAGE_SHIFT);
/* Next allocate a local port which xenstored can bind to */
alloc_unbound.dom = DOMID_SELF;
alloc_unbound.remote_dom = 0;
err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
&alloc_unbound);
if (err == -ENOSYS)
goto out_error;
BUG_ON(err);
xen_store_evtchn = xen_start_info->store_evtchn =
alloc_unbound.port;
xen_store_interface = mfn_to_virt(xen_store_mfn);
} else {
if (xen_hvm_domain()) {
uint64_t v = 0;
......@@ -869,6 +894,8 @@ static int __init xenbus_init(void)
bus_unregister(&xenbus_frontend.bus);
out_error:
if (page != 0)
free_page(page);
return err;
}
......
......@@ -12,6 +12,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
void *dev_id);
int bind_virq_to_irq(unsigned int virq, unsigned int cpu);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
......@@ -53,6 +54,10 @@ bool xen_test_irq_pending(int irq);
irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq);
/* Poll waiting for an irq to become pending with a timeout. In the usual case,
* the irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq_timeout(int irq, u64 timeout);
/* Determine the IRQ which is bound to an event channel */
unsigned irq_from_evtchn(unsigned int evtchn);
......@@ -63,4 +68,25 @@ int xen_set_callback_via(uint64_t via);
void xen_evtchn_do_upcall(struct pt_regs *regs);
void xen_hvm_evtchn_do_upcall(void);
/* Allocate an irq for a physical interrupt, given a gsi. "Legacy"
* GSIs are identity mapped; others are dynamically allocated as
* usual. */
int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
#ifdef CONFIG_PCI_MSI
/* Allocate an irq and a pirq to be used with MSIs. */
void xen_allocate_pirq_msi(char *name, int *irq, int *pirq);
int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
#endif
/* De-allocates the above mentioned physical interrupt. */
int xen_destroy_irq(int irq);
/* Return vector allocated to pirq */
int xen_vector_from_irq(unsigned pirq);
/* Return gsi allocated to pirq */
int xen_gsi_from_irq(unsigned pirq);
#endif /* _XEN_EVENTS_H */
......@@ -47,6 +47,9 @@
/* x86: pvclock algorithm is safe to use on HVM */
#define XENFEAT_hvm_safe_pvclock 9
/* x86: pirq can be used by HVM guests */
#define XENFEAT_hvm_pirqs 10
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */
/*
* PCI Backend/Frontend Common Data Structures & Macros
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#ifndef __XEN_PCI_COMMON_H__
#define __XEN_PCI_COMMON_H__
/* Be sure to bump this number if you change this file */
#define XEN_PCI_MAGIC "7"
/* xen_pci_sharedinfo flags */
#define _XEN_PCIF_active (0)
#define XEN_PCIF_active (1<<_XEN_PCIF_active)
#define _XEN_PCIB_AERHANDLER (1)
#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER)
#define _XEN_PCIB_active (2)
#define XEN_PCIB_active (1<<_XEN_PCIB_active)
/* xen_pci_op commands */
#define XEN_PCI_OP_conf_read (0)
#define XEN_PCI_OP_conf_write (1)
#define XEN_PCI_OP_enable_msi (2)
#define XEN_PCI_OP_disable_msi (3)
#define XEN_PCI_OP_enable_msix (4)
#define XEN_PCI_OP_disable_msix (5)
#define XEN_PCI_OP_aer_detected (6)
#define XEN_PCI_OP_aer_resume (7)
#define XEN_PCI_OP_aer_mmio (8)
#define XEN_PCI_OP_aer_slotreset (9)
/* xen_pci_op error numbers */
#define XEN_PCI_ERR_success (0)
#define XEN_PCI_ERR_dev_not_found (-1)
#define XEN_PCI_ERR_invalid_offset (-2)
#define XEN_PCI_ERR_access_denied (-3)
#define XEN_PCI_ERR_not_implemented (-4)
/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */
#define XEN_PCI_ERR_op_failed (-5)
/*
* it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))
* Should not exceed 128
*/
#define SH_INFO_MAX_VEC 128
struct xen_msix_entry {
uint16_t vector;
uint16_t entry;
};
struct xen_pci_op {
/* IN: what action to perform: XEN_PCI_OP_* */
uint32_t cmd;
/* OUT: will contain an error number (if any) from errno.h */
int32_t err;
/* IN: which device to touch */
uint32_t domain; /* PCI Domain/Segment */
uint32_t bus;
uint32_t devfn;
/* IN: which configuration registers to touch */
int32_t offset;
int32_t size;
/* IN/OUT: Contains the result after a READ or the value to WRITE */
uint32_t value;
/* IN: Contains extra infor for this operation */
uint32_t info;
/*IN: param for msi-x */
struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
};
/*used for pcie aer handling*/
struct xen_pcie_aer_op {
/* IN: what action to perform: XEN_PCI_OP_* */
uint32_t cmd;
/*IN/OUT: return aer_op result or carry error_detected state as input*/
int32_t err;
/* IN: which device to touch */
uint32_t domain; /* PCI Domain/Segment*/
uint32_t bus;
uint32_t devfn;
};
struct xen_pci_sharedinfo {
/* flags - XEN_PCIF_* */
uint32_t flags;
struct xen_pci_op op;
struct xen_pcie_aer_op aer_op;
};
#endif /* __XEN_PCI_COMMON_H__ */
......@@ -27,8 +27,14 @@ enum xenbus_state
XenbusStateClosing = 5, /* The device is being closed
due to an error or an unplug
event. */
XenbusStateClosed = 6
XenbusStateClosed = 6,
/*
* Reconfiguring: The device is being reconfigured.
*/
XenbusStateReconfiguring = 7,
XenbusStateReconfigured = 8
};
#endif /* _XEN_PUBLIC_IO_XENBUS_H */
......
......@@ -106,6 +106,57 @@ struct physdev_irq {
uint32_t vector;
};
#define MAP_PIRQ_TYPE_MSI 0x0
#define MAP_PIRQ_TYPE_GSI 0x1
#define MAP_PIRQ_TYPE_UNKNOWN 0x2
#define PHYSDEVOP_map_pirq 13
struct physdev_map_pirq {
domid_t domid;
/* IN */
int type;
/* IN */
int index;
/* IN or OUT */
int pirq;
/* IN */
int bus;
/* IN */
int devfn;
/* IN */
int entry_nr;
/* IN */
uint64_t table_base;
};
#define PHYSDEVOP_unmap_pirq 14
struct physdev_unmap_pirq {
domid_t domid;
/* IN */
int pirq;
};
#define PHYSDEVOP_manage_pci_add 15
#define PHYSDEVOP_manage_pci_remove 16
struct physdev_manage_pci {
/* IN */
uint8_t bus;
uint8_t devfn;
};
#define PHYSDEVOP_manage_pci_add_ext 20
struct physdev_manage_pci_ext {
/* IN */
uint8_t bus;
uint8_t devfn;
unsigned is_extfn;
unsigned is_virtfn;
struct {
uint8_t bus;
uint8_t devfn;
} physfn;
};
/*
* Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
* hypercall since 0x00030202.
......@@ -121,6 +172,22 @@ struct physdev_op {
} u;
};
#define PHYSDEVOP_setup_gsi 21
struct physdev_setup_gsi {
int gsi;
/* IN */
uint8_t triggering;
/* IN */
uint8_t polarity;
/* IN */
};
#define PHYSDEVOP_get_nr_pirqs 22
struct physdev_nr_pirqs {
/* OUT */
uint32_t nr_pirqs;
};
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment