Commit c1b1a5f1 authored by David S. Miller's avatar David S. Miller

[SPARC64]: NUMA device infrastructure.

Record and propagate NUMA information for devices.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0c49a573
...@@ -396,6 +396,7 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de ...@@ -396,6 +396,7 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de
sd->op = &dev->ofdev; sd->op = &dev->ofdev;
sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu;
sd->stc = dev->bus->ofdev.dev.parent->archdata.stc; sd->stc = dev->bus->ofdev.dev.parent->archdata.stc;
sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node;
dev->ofdev.node = dp; dev->ofdev.node = dp;
dev->ofdev.dev.parent = &dev->bus->ofdev.dev; dev->ofdev.dev.parent = &dev->bus->ofdev.dev;
......
...@@ -173,9 +173,11 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np ...@@ -173,9 +173,11 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np
} }
int iommu_table_init(struct iommu *iommu, int tsbsize, int iommu_table_init(struct iommu *iommu, int tsbsize,
u32 dma_offset, u32 dma_addr_mask) u32 dma_offset, u32 dma_addr_mask,
int numa_node)
{ {
unsigned long i, tsbbase, order, sz, num_tsb_entries; unsigned long i, order, sz, num_tsb_entries;
struct page *page;
num_tsb_entries = tsbsize / sizeof(iopte_t); num_tsb_entries = tsbsize / sizeof(iopte_t);
...@@ -188,11 +190,12 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, ...@@ -188,11 +190,12 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
/* Allocate and initialize the free area map. */ /* Allocate and initialize the free area map. */
sz = num_tsb_entries / 8; sz = num_tsb_entries / 8;
sz = (sz + 7UL) & ~7UL; sz = (sz + 7UL) & ~7UL;
iommu->arena.map = kzalloc(sz, GFP_KERNEL); iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
if (!iommu->arena.map) { if (!iommu->arena.map) {
printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
return -ENOMEM; return -ENOMEM;
} }
memset(iommu->arena.map, 0, sz);
iommu->arena.limit = num_tsb_entries; iommu->arena.limit = num_tsb_entries;
if (tlb_type != hypervisor) if (tlb_type != hypervisor)
...@@ -201,21 +204,23 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, ...@@ -201,21 +204,23 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
/* Allocate and initialize the dummy page which we /* Allocate and initialize the dummy page which we
* set inactive IO PTEs to point to. * set inactive IO PTEs to point to.
*/ */
iommu->dummy_page = get_zeroed_page(GFP_KERNEL); page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
if (!iommu->dummy_page) { if (!page) {
printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n"); printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
goto out_free_map; goto out_free_map;
} }
iommu->dummy_page = (unsigned long) page_address(page);
memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
/* Now allocate and setup the IOMMU page table itself. */ /* Now allocate and setup the IOMMU page table itself. */
order = get_order(tsbsize); order = get_order(tsbsize);
tsbbase = __get_free_pages(GFP_KERNEL, order); page = alloc_pages_node(numa_node, GFP_KERNEL, order);
if (!tsbbase) { if (!page) {
printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n"); printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
goto out_free_dummy_page; goto out_free_dummy_page;
} }
iommu->page_table = (iopte_t *)tsbbase; iommu->page_table = (iopte_t *)page_address(page);
for (i = 0; i < num_tsb_entries; i++) for (i = 0; i < num_tsb_entries; i++)
iopte_make_dummy(iommu, &iommu->page_table[i]); iopte_make_dummy(iommu, &iommu->page_table[i]);
...@@ -276,20 +281,24 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx) ...@@ -276,20 +281,24 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
static void *dma_4u_alloc_coherent(struct device *dev, size_t size, static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp) dma_addr_t *dma_addrp, gfp_t gfp)
{ {
unsigned long flags, order, first_page;
struct iommu *iommu; struct iommu *iommu;
struct page *page;
int npages, nid;
iopte_t *iopte; iopte_t *iopte;
unsigned long flags, order, first_page;
void *ret; void *ret;
int npages;
size = IO_PAGE_ALIGN(size); size = IO_PAGE_ALIGN(size);
order = get_order(size); order = get_order(size);
if (order >= 10) if (order >= 10)
return NULL; return NULL;
first_page = __get_free_pages(gfp, order); nid = dev->archdata.numa_node;
if (first_page == 0UL) page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
return NULL; return NULL;
first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order); memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu; iommu = dev->archdata.iommu;
......
...@@ -92,6 +92,7 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br) ...@@ -92,6 +92,7 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br)
sd->op = &isa_dev->ofdev; sd->op = &isa_dev->ofdev;
sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu; sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
sd->stc = isa_br->ofdev.dev.parent->archdata.stc; sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
isa_dev->ofdev.node = dp; isa_dev->ofdev.node = dp;
isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev; isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/mod_devicetable.h> #include <linux/mod_devicetable.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/irq.h>
#include <linux/of_device.h> #include <linux/of_device.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
...@@ -660,6 +661,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, ...@@ -660,6 +661,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
struct device_node *dp = op->node; struct device_node *dp = op->node;
struct device_node *pp, *ip; struct device_node *pp, *ip;
unsigned int orig_irq = irq; unsigned int orig_irq = irq;
int nid;
if (irq == 0xffffffff) if (irq == 0xffffffff)
return irq; return irq;
...@@ -672,7 +674,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, ...@@ -672,7 +674,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
printk("%s: direct translate %x --> %x\n", printk("%s: direct translate %x --> %x\n",
dp->full_name, orig_irq, irq); dp->full_name, orig_irq, irq);
return irq; goto out;
} }
/* Something more complicated. Walk up to the root, applying /* Something more complicated. Walk up to the root, applying
...@@ -744,6 +746,14 @@ static unsigned int __init build_one_device_irq(struct of_device *op, ...@@ -744,6 +746,14 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
printk("%s: Apply IRQ trans [%s] %x --> %x\n", printk("%s: Apply IRQ trans [%s] %x --> %x\n",
op->node->full_name, ip->full_name, orig_irq, irq); op->node->full_name, ip->full_name, orig_irq, irq);
out:
nid = of_node_to_nid(dp);
if (nid != -1) {
cpumask_t numa_mask = node_to_cpumask(nid);
irq_set_affinity(irq, numa_mask);
}
return irq; return irq;
} }
......
...@@ -369,10 +369,12 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, ...@@ -369,10 +369,12 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
sd->host_controller = pbm; sd->host_controller = pbm;
sd->prom_node = node; sd->prom_node = node;
sd->op = of_find_device_by_node(node); sd->op = of_find_device_by_node(node);
sd->numa_node = pbm->numa_node;
sd = &sd->op->dev.archdata; sd = &sd->op->dev.archdata;
sd->iommu = pbm->iommu; sd->iommu = pbm->iommu;
sd->stc = &pbm->stc; sd->stc = &pbm->stc;
sd->numa_node = pbm->numa_node;
type = of_get_property(node, "device_type", NULL); type = of_get_property(node, "device_type", NULL);
if (type == NULL) if (type == NULL)
...@@ -1159,6 +1161,16 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, ...@@ -1159,6 +1161,16 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
return 0; return 0;
} }
#ifdef CONFIG_NUMA
int pcibus_to_node(struct pci_bus *pbus)
{
struct pci_pbm_info *pbm = pbus->sysdata;
return pbm->numa_node;
}
EXPORT_SYMBOL(pcibus_to_node);
#endif
/* Return the domain nuber for this pci bus */ /* Return the domain nuber for this pci bus */
int pci_domain_nr(struct pci_bus *pbus) int pci_domain_nr(struct pci_bus *pbus)
......
...@@ -71,7 +71,8 @@ static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm) ...@@ -71,7 +71,8 @@ static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
*/ */
fire_write(iommu->iommu_flushinv, ~(u64)0); fire_write(iommu->iommu_flushinv, ~(u64)0);
err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
pbm->numa_node);
if (err) if (err)
return err; return err;
...@@ -449,6 +450,8 @@ static int __init pci_fire_pbm_init(struct pci_controller_info *p, ...@@ -449,6 +450,8 @@ static int __init pci_fire_pbm_init(struct pci_controller_info *p,
pbm->next = pci_pbm_root; pbm->next = pci_pbm_root;
pci_pbm_root = pbm; pci_pbm_root = pbm;
pbm->numa_node = -1;
pbm->scan_bus = pci_fire_scan_bus; pbm->scan_bus = pci_fire_scan_bus;
pbm->pci_ops = &sun4u_pci_ops; pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 12; pbm->config_space_reg_bits = 12;
......
...@@ -148,6 +148,8 @@ struct pci_pbm_info { ...@@ -148,6 +148,8 @@ struct pci_pbm_info {
struct pci_bus *pci_bus; struct pci_bus *pci_bus;
void (*scan_bus)(struct pci_pbm_info *); void (*scan_bus)(struct pci_pbm_info *);
struct pci_ops *pci_ops; struct pci_ops *pci_ops;
int numa_node;
}; };
struct pci_controller_info { struct pci_controller_info {
......
...@@ -279,11 +279,17 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, ...@@ -279,11 +279,17 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
unsigned long devino) unsigned long devino)
{ {
int irq = ops->msiq_build_irq(pbm, msiqid, devino); int irq = ops->msiq_build_irq(pbm, msiqid, devino);
int err; int err, nid;
if (irq < 0) if (irq < 0)
return irq; return irq;
nid = pbm->numa_node;
if (nid != -1) {
cpumask_t numa_mask = node_to_cpumask(nid);
irq_set_affinity(irq, numa_mask);
}
err = request_irq(irq, sparc64_msiq_interrupt, 0, err = request_irq(irq, sparc64_msiq_interrupt, 0,
"MSIQ", "MSIQ",
&pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]); &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
......
...@@ -848,7 +848,8 @@ static int psycho_iommu_init(struct pci_pbm_info *pbm) ...@@ -848,7 +848,8 @@ static int psycho_iommu_init(struct pci_pbm_info *pbm)
/* Leave diag mode enabled for full-flushing done /* Leave diag mode enabled for full-flushing done
* in pci_iommu.c * in pci_iommu.c
*/ */
err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff,
pbm->numa_node);
if (err) if (err)
return err; return err;
...@@ -979,6 +980,8 @@ static void __init psycho_pbm_init(struct pci_controller_info *p, ...@@ -979,6 +980,8 @@ static void __init psycho_pbm_init(struct pci_controller_info *p,
pbm->next = pci_pbm_root; pbm->next = pci_pbm_root;
pci_pbm_root = pbm; pci_pbm_root = pbm;
pbm->numa_node = -1;
pbm->scan_bus = psycho_scan_bus; pbm->scan_bus = psycho_scan_bus;
pbm->pci_ops = &sun4u_pci_ops; pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 8; pbm->config_space_reg_bits = 8;
......
...@@ -704,7 +704,7 @@ static int sabre_iommu_init(struct pci_pbm_info *pbm, ...@@ -704,7 +704,7 @@ static int sabre_iommu_init(struct pci_pbm_info *pbm,
* in pci_iommu.c * in pci_iommu.c
*/ */
err = iommu_table_init(iommu, tsbsize * 1024 * 8, err = iommu_table_init(iommu, tsbsize * 1024 * 8,
dvma_offset, dma_mask); dvma_offset, dma_mask, pbm->numa_node);
if (err) if (err)
return err; return err;
...@@ -737,6 +737,8 @@ static void __init sabre_pbm_init(struct pci_controller_info *p, ...@@ -737,6 +737,8 @@ static void __init sabre_pbm_init(struct pci_controller_info *p,
pbm->name = dp->full_name; pbm->name = dp->full_name;
printk("%s: SABRE PCI Bus Module\n", pbm->name); printk("%s: SABRE PCI Bus Module\n", pbm->name);
pbm->numa_node = -1;
pbm->scan_bus = sabre_scan_bus; pbm->scan_bus = sabre_scan_bus;
pbm->pci_ops = &sun4u_pci_ops; pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 8; pbm->config_space_reg_bits = 8;
......
...@@ -1220,7 +1220,8 @@ static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm) ...@@ -1220,7 +1220,8 @@ static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
/* Leave diag mode enabled for full-flushing done /* Leave diag mode enabled for full-flushing done
* in pci_iommu.c * in pci_iommu.c
*/ */
err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
pbm->numa_node);
if (err) if (err)
return err; return err;
...@@ -1379,6 +1380,8 @@ static int __init schizo_pbm_init(struct pci_controller_info *p, ...@@ -1379,6 +1380,8 @@ static int __init schizo_pbm_init(struct pci_controller_info *p,
pbm->next = pci_pbm_root; pbm->next = pci_pbm_root;
pci_pbm_root = pbm; pci_pbm_root = pbm;
pbm->numa_node = -1;
pbm->scan_bus = schizo_scan_bus; pbm->scan_bus = schizo_scan_bus;
pbm->pci_ops = &sun4u_pci_ops; pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 8; pbm->config_space_reg_bits = 8;
......
...@@ -127,10 +127,12 @@ static inline long iommu_batch_end(void) ...@@ -127,10 +127,12 @@ static inline long iommu_batch_end(void)
static void *dma_4v_alloc_coherent(struct device *dev, size_t size, static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp) dma_addr_t *dma_addrp, gfp_t gfp)
{ {
struct iommu *iommu;
unsigned long flags, order, first_page, npages, n; unsigned long flags, order, first_page, npages, n;
struct iommu *iommu;
struct page *page;
void *ret; void *ret;
long entry; long entry;
int nid;
size = IO_PAGE_ALIGN(size); size = IO_PAGE_ALIGN(size);
order = get_order(size); order = get_order(size);
...@@ -139,10 +141,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, ...@@ -139,10 +141,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
npages = size >> IO_PAGE_SHIFT; npages = size >> IO_PAGE_SHIFT;
first_page = __get_free_pages(gfp, order); nid = dev->archdata.numa_node;
if (unlikely(first_page == 0UL)) page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
return NULL; return NULL;
first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order); memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu; iommu = dev->archdata.iommu;
...@@ -899,6 +903,8 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, ...@@ -899,6 +903,8 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p,
pbm->next = pci_pbm_root; pbm->next = pci_pbm_root;
pci_pbm_root = pbm; pci_pbm_root = pbm;
pbm->numa_node = of_node_to_nid(dp);
pbm->scan_bus = pci_sun4v_scan_bus; pbm->scan_bus = pci_sun4v_scan_bus;
pbm->pci_ops = &sun4v_pci_ops; pbm->pci_ops = &sun4v_pci_ops;
pbm->config_space_reg_bits = 12; pbm->config_space_reg_bits = 12;
...@@ -913,6 +919,7 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, ...@@ -913,6 +919,7 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p,
pbm->name = dp->full_name; pbm->name = dp->full_name;
printk("%s: SUN4V PCI Bus Module\n", pbm->name); printk("%s: SUN4V PCI Bus Module\n", pbm->name);
printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
pci_determine_mem_io_space(pbm); pci_determine_mem_io_space(pbm);
......
...@@ -544,6 +544,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) ...@@ -544,6 +544,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
sbus->ofdev.dev.archdata.iommu = iommu; sbus->ofdev.dev.archdata.iommu = iommu;
sbus->ofdev.dev.archdata.stc = strbuf; sbus->ofdev.dev.archdata.stc = strbuf;
sbus->ofdev.dev.archdata.numa_node = -1;
reg_base = regs + SYSIO_IOMMUREG_BASE; reg_base = regs + SYSIO_IOMMUREG_BASE;
iommu->iommu_control = reg_base + IOMMU_CONTROL; iommu->iommu_control = reg_base + IOMMU_CONTROL;
...@@ -575,7 +576,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) ...@@ -575,7 +576,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
sbus->portid, regs); sbus->portid, regs);
/* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff)) if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1))
goto fatal_memory_error; goto fatal_memory_error;
control = upa_readq(iommu->iommu_control); control = upa_readq(iommu->iommu_control);
......
...@@ -16,6 +16,8 @@ struct dev_archdata { ...@@ -16,6 +16,8 @@ struct dev_archdata {
struct device_node *prom_node; struct device_node *prom_node;
struct of_device *op; struct of_device *op;
int numa_node;
}; };
#endif /* _ASM_SPARC_DEVICE_H */ #endif /* _ASM_SPARC_DEVICE_H */
...@@ -77,6 +77,11 @@ extern int of_getintprop_default(struct device_node *np, ...@@ -77,6 +77,11 @@ extern int of_getintprop_default(struct device_node *np,
const char *name, const char *name,
int def); int def);
extern int of_find_in_proplist(const char *list, const char *match, int len); extern int of_find_in_proplist(const char *list, const char *match, int len);
#ifdef CONFIG_NUMA
extern int of_node_to_nid(struct device_node *dp);
#else
#define of_node_to_nid(dp) (-1)
#endif
extern void prom_build_devicetree(void); extern void prom_build_devicetree(void);
......
...@@ -56,6 +56,7 @@ struct strbuf { ...@@ -56,6 +56,7 @@ struct strbuf {
}; };
extern int iommu_table_init(struct iommu *iommu, int tsbsize, extern int iommu_table_init(struct iommu *iommu, int tsbsize,
u32 dma_offset, u32 dma_addr_mask); u32 dma_offset, u32 dma_addr_mask,
int numa_node);
#endif /* !(_SPARC64_IOMMU_H) */ #endif /* !(_SPARC64_IOMMU_H) */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment