[PATCH] ppc64: iommu rewrite

Lots of things renamed, sillicaps killed, stuffs moved around and common
code properly extracted from implementation specific code, new
allocator, etc...  The code is overall a lot simpler, faster, less prone
to fail, and a lot more manageable.  I didn't use "bk mv", there is no
need to keep the old history attached to the new file.
parent e2d17c13
...@@ -109,6 +109,18 @@ config POWER4_ONLY ...@@ -109,6 +109,18 @@ config POWER4_ONLY
binary will not work on POWER3 or RS64 processors when compiled with binary will not work on POWER3 or RS64 processors when compiled with
binutils 2.15 or later. binutils 2.15 or later.
config IOMMU_VMERGE
bool "Enable IOMMU virtual merging (EXPERIMENTAL)"
depends on EXPERIMENTAL
default n
help
Cause IO segments sent to a device for DMA to be merged virtually
by the IOMMU when they happen to have been allocated contiguously.
This doesn't add pressure to the IOMMU allocator. However, some
drivers don't support getting large merged segments coming back
from *_map_sg(). Say Y if you know the drivers you are using are
properly handling this case.
config SMP config SMP
bool "Symmetric multi-processing support" bool "Symmetric multi-processing support"
---help--- ---help---
......
...@@ -10,11 +10,12 @@ obj-y := setup.o entry.o traps.o irq.o idle.o \ ...@@ -10,11 +10,12 @@ obj-y := setup.o entry.o traps.o irq.o idle.o \
align.o semaphore.o bitops.o stab.o pacaData.o \ align.o semaphore.o bitops.o stab.o pacaData.o \
udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
ptrace32.o signal32.o pmc.o rtc.o init_task.o \ ptrace32.o signal32.o pmc.o rtc.o init_task.o \
lmb.o cputable.o cpu_setup_power4.o idle_power4.o lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
iommu.o
obj-$(CONFIG_PPC_OF) += of_device.o obj-$(CONFIG_PPC_OF) += of_device.o
obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_iommu.o
ifdef CONFIG_PPC_ISERIES ifdef CONFIG_PPC_ISERIES
obj-$(CONFIG_PCI) += iSeries_pci.o iSeries_pci_reset.o \ obj-$(CONFIG_PCI) += iSeries_pci.o iSeries_pci_reset.o \
...@@ -28,12 +29,12 @@ obj-$(CONFIG_PPC_ISERIES) += iSeries_irq.o \ ...@@ -28,12 +29,12 @@ obj-$(CONFIG_PPC_ISERIES) += iSeries_irq.o \
HvCall.o HvLpConfig.o LparData.o mf_proc.o \ HvCall.o HvLpConfig.o LparData.o mf_proc.o \
iSeries_setup.o ItLpQueue.o hvCall.o \ iSeries_setup.o ItLpQueue.o hvCall.o \
mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \ mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \
proc_pmc.o proc_pmc.o iSeries_iommu.o
obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \
eeh.o nvram.o pSeries_nvram.o rtasd.o ras.o \ eeh.o nvram.o pSeries_nvram.o rtasd.o ras.o \
open_pic.o xics.o pSeries_htab.o rtas.o \ open_pic.o xics.o pSeries_htab.o rtas.o \
chrp_setup.o i8259.o prom.o vio.o chrp_setup.o i8259.o prom.o vio.o pSeries_iommu.o
obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
#include <asm/prom.h> #include <asm/prom.h>
#include <asm/rtas.h> #include <asm/rtas.h>
#include <asm/pci-bridge.h> #include <asm/pci-bridge.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/machdep.h> #include <asm/machdep.h>
#include <asm/irq.h> #include <asm/irq.h>
......
/*
* arch/ppc64/kernel/iSeries_iommu.c
*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
* Rewrite, cleanup:
*
* Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
*
* Dynamic DMA mapping support, iSeries-specific parts.
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/ppcdebug.h>
#include <asm/iSeries/HvCallXm.h>
#include <asm/iSeries/LparData.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/iSeries/iSeries_pci.h>
#include <asm/machdep.h>
#include "pci.h"
static struct iommu_table veth_iommu_table; /* Tce table for virtual ethernet */
static struct iommu_table vio_iommu_table; /* Tce table for virtual I/O */
static struct iSeries_Device_Node veth_dev_node = { .LogicalSlot = 0xFF, .iommu_table = &veth_iommu_table };
static struct iSeries_Device_Node vio_dev_node = { .LogicalSlot = 0xFF, .iommu_table = &vio_iommu_table };
static struct pci_dev _veth_dev = { .sysdata = &veth_dev_node };
static struct pci_dev _vio_dev = { .sysdata = &vio_dev_node, .dev.bus = &pci_bus_type };
struct pci_dev *iSeries_veth_dev = &_veth_dev;
struct device *iSeries_vio_dev = &_vio_dev.dev;
extern struct list_head iSeries_Global_Device_List;
static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, int direction)
{
u64 rc;
union tce_entry tce;
while (npages--) {
tce.te_word = 0;
tce.te_bits.tb_rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
if (tbl->it_type == TCE_VB) {
/* Virtual Bus */
tce.te_bits.tb_valid = 1;
tce.te_bits.tb_allio = 1;
if (direction != PCI_DMA_TODEVICE)
tce.te_bits.tb_rdwr = 1;
} else {
/* PCI Bus */
tce.te_bits.tb_rdwr = 1; /* Read allowed */
if (direction != PCI_DMA_TODEVICE)
tce.te_bits.tb_pciwr = 1;
}
rc = HvCallXm_setTce((u64)tbl->it_index,
(u64)index,
tce.te_word);
if (rc)
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", rc);
index++;
uaddr += PAGE_SIZE;
}
}
static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
{
u64 rc;
union tce_entry tce;
while (npages--) {
tce.te_word = 0;
rc = HvCallXm_setTce((u64)tbl->it_index,
(u64)index,
tce.te_word);
if (rc)
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", rc);
index++;
}
}
void __init iommu_vio_init(void)
{
struct iommu_table *t;
struct iommu_table_cb cb;
unsigned long cbp;
cb.itc_busno = 255; /* Bus 255 is the virtual bus */
cb.itc_virtbus = 0xff; /* Ask for virtual bus */
cbp = virt_to_absolute((unsigned long)&cb);
HvCallXm_getTceTableParms(cbp);
veth_iommu_table.it_size = cb.itc_size / 2;
veth_iommu_table.it_busno = cb.itc_busno;
veth_iommu_table.it_offset = cb.itc_offset;
veth_iommu_table.it_index = cb.itc_index;
veth_iommu_table.it_type = TCE_VB;
veth_iommu_table.it_entrysize = sizeof(union tce_entry);
veth_iommu_table.it_blocksize = 1;
t = iommu_init_table(&veth_iommu_table);
if (!t)
printk("Virtual Bus VETH TCE table failed.\n");
vio_iommu_table.it_size = cb.itc_size - veth_iommu_table.it_size;
vio_iommu_table.it_busno = cb.itc_busno;
vio_iommu_table.it_offset = cb.itc_offset +
veth_iommu_table.it_size * (PAGE_SIZE/sizeof(union tce_entry));
vio_iommu_table.it_index = cb.itc_index;
vio_iommu_table.it_type = TCE_VB;
vio_iommu_table.it_entrysize = sizeof(union tce_entry);
vio_iommu_table.it_blocksize = 1;
t = iommu_init_table(&vio_iommu_table);
if (!t)
printk("Virtual Bus VIO TCE table failed.\n");
}
/*
* This function compares the known tables to find an iommu_table
* that has already been built for hardware TCEs.
*/
static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
{
struct iSeries_Device_Node *dp;
for (dp = (struct iSeries_Device_Node *)iSeries_Global_Device_List.next;
dp != (struct iSeries_Device_Node *)&iSeries_Global_Device_List;
dp = (struct iSeries_Device_Node *)dp->Device_List.next)
if (dp->iommu_table != NULL &&
dp->iommu_table->it_type == TCE_PCI &&
dp->iommu_table->it_offset == tbl->it_offset &&
dp->iommu_table->it_index == tbl->it_index &&
dp->iommu_table->it_size == tbl->it_size)
return dp->iommu_table;
return NULL;
}
/*
* Call Hv with the architected data structure to get TCE table info.
* info. Put the returned data into the Linux representation of the
* TCE table data.
* The Hardware Tce table comes in three flavors.
* 1. TCE table shared between Buses.
* 2. TCE table per Bus.
* 3. TCE Table per IOA.
*/
static void iommu_table_getparms(struct iSeries_Device_Node* dn,
struct iommu_table* tbl)
{
struct iommu_table_cb *parms;
parms = (struct iommu_table_cb*)kmalloc(sizeof(*parms), GFP_KERNEL);
if (parms == NULL)
panic("PCI_DMA: TCE Table Allocation failed.");
memset(parms, 0, sizeof(*parms));
parms->itc_busno = ISERIES_BUS(dn);
parms->itc_slotno = dn->LogicalSlot;
parms->itc_virtbus = 0;
HvCallXm_getTceTableParms(REALADDR(parms));
if (parms->itc_size == 0)
panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
tbl->it_size = parms->itc_size;
tbl->it_busno = parms->itc_busno;
tbl->it_offset = parms->itc_offset;
tbl->it_index = parms->itc_index;
tbl->it_entrysize = sizeof(union tce_entry);
tbl->it_blocksize = 1;
tbl->it_type = TCE_PCI;
kfree(parms);
}
void iommu_devnode_init(struct iSeries_Device_Node *dn) {
struct iommu_table *tbl;
tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
iommu_table_getparms(dn, tbl);
/* Look for existing tce table */
dn->iommu_table = iommu_table_find(tbl);
if (dn->iommu_table == NULL)
dn->iommu_table = iommu_init_table(tbl);
else
kfree(tbl);
return;
}
void tce_init_iSeries(void)
{
ppc_md.tce_build = tce_build_iSeries;
ppc_md.tce_free = tce_free_iSeries;
pci_iommu_init();
}
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include <asm/pci-bridge.h> #include <asm/pci-bridge.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <asm/iSeries/HvCallPci.h> #include <asm/iSeries/HvCallPci.h>
#include <asm/iSeries/HvCallSm.h> #include <asm/iSeries/HvCallSm.h>
...@@ -53,7 +53,7 @@ extern int panic_timeout; ...@@ -53,7 +53,7 @@ extern int panic_timeout;
extern unsigned long iSeries_Base_Io_Memory; extern unsigned long iSeries_Base_Io_Memory;
extern struct TceTable *tceTables[256]; extern struct iommu_table *tceTables[256];
extern void iSeries_MmIoTest(void); extern void iSeries_MmIoTest(void);
...@@ -273,7 +273,7 @@ void __init iSeries_pci_final_fixup(void) ...@@ -273,7 +273,7 @@ void __init iSeries_pci_final_fixup(void)
iSeries_Device_Information(pdev, Buffer, iSeries_Device_Information(pdev, Buffer,
sizeof(Buffer)); sizeof(Buffer));
printk("%d. %s\n", DeviceCount, Buffer); printk("%d. %s\n", DeviceCount, Buffer);
create_pci_bus_tce_table((unsigned long)node); iommu_devnode_init(node);
} else } else
printk("PCI: Device Tree not found for 0x%016lX\n", printk("PCI: Device Tree not found for 0x%016lX\n",
(unsigned long)pdev); (unsigned long)pdev);
......
/*
* arch/ppc64/kernel/pci_iommu.c
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
* Rewrite, cleanup, new allocation schemes, virtual merging:
* Copyright (C) 2004 Olof Johansson, IBM Corporation
* and Ben. Herrenschmidt, IBM Corporation
*
* Dynamic DMA mapping support, platform-independent parts.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/bitops.h>
#define DBG(...)
#ifdef CONFIG_IOMMU_VMERGE
static int novmerge = 0;
#else
static int novmerge = 1;
#endif
static int __init setup_iommu(char *str)
{
if (!strcmp(str, "novmerge"))
novmerge = 1;
else if (!strcmp(str, "vmerge"))
novmerge = 0;
return 1;
}
__setup("iommu=", setup_iommu);
static unsigned long iommu_range_alloc(struct iommu_table *tbl, unsigned long npages,
unsigned long *handle)
{
unsigned long n, end, i, start;
unsigned long hint;
unsigned long limit;
int largealloc = npages > 15;
if (handle && *handle)
hint = *handle;
else
hint = largealloc ? tbl->it_largehint : tbl->it_hint;
/* Most of this is stolen from x86_64's bit string search function */
start = hint;
/* Use only half of the table for small allocs (less than 15 pages). */
limit = largealloc ? tbl->it_mapsize : tbl->it_mapsize >> 1;
if (largealloc && start < (tbl->it_mapsize >> 1))
start = tbl->it_mapsize >> 1;
again:
n = find_next_zero_bit(tbl->it_map, limit, start);
end = n + npages;
if (end >= limit) {
if (hint) {
start = largealloc ? tbl->it_mapsize >> 1 : 0;
hint = 0;
goto again;
} else
return NO_TCE;
}
for (i = n; i < end; i++)
if (test_bit(i, tbl->it_map)) {
start = i+1;
goto again;
}
for (i = n; i < end; i++)
__set_bit(i, tbl->it_map);
/* Bump the hint to a new PHB cache line, which
* is 16 entries wide on all pSeries machines.
*/
if (largealloc)
tbl->it_largehint = (end+tbl->it_blocksize-1) &
~(tbl->it_blocksize-1);
else
tbl->it_hint = (end+tbl->it_blocksize-1) &
~(tbl->it_blocksize-1);
if (handle)
*handle = end;
return n;
}
dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
unsigned int npages, int direction,
unsigned long *handle)
{
unsigned long entry, flags;
dma_addr_t retTce = NO_TCE;
spin_lock_irqsave(&(tbl->it_lock), flags);
/* Allocate a range of entries into the table */
entry = iommu_range_alloc(tbl, npages, handle);
if (unlikely(entry == NO_TCE)) {
spin_unlock_irqrestore(&(tbl->it_lock), flags);
return NO_TCE;
}
/* We got the tces we wanted */
entry += tbl->it_offset; /* Offset into real TCE table */
retTce = entry << PAGE_SHIFT; /* Set the return dma address */
/* Put the TCEs in the HW table */
ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, direction);
/* Flush/invalidate TLBs if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
return retTce;
}
static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
unsigned long entry, free_entry;
unsigned long i;
entry = dma_addr >> PAGE_SHIFT;
free_entry = entry - tbl->it_offset;
if (((free_entry + npages) > tbl->it_mapsize) ||
(entry < tbl->it_offset)) {
if (printk_ratelimit()) {
printk(KERN_INFO "iommu_free: invalid entry\n");
printk(KERN_INFO "\tentry = 0x%lx\n", entry);
printk(KERN_INFO "\tdma_ddr = 0x%lx\n", (u64)dma_addr);
printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
printk(KERN_INFO "\tmapsize = 0x%lx\n", (u64)tbl->it_mapsize);
printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
WARN_ON(1);
}
return;
}
ppc_md.tce_free(tbl, entry, npages);
for (i = 0; i < npages; i++)
__clear_bit(free_entry+i, tbl->it_map);
}
void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
unsigned long flags;
spin_lock_irqsave(&(tbl->it_lock), flags);
__iommu_free(tbl, dma_addr, npages);
/* Flush/invalidate TLBs if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
struct iommu_table *iommu_init_table(struct iommu_table *tbl)
{
unsigned long sz;
static int welcomed = 0;
/* it_size is in pages, it_mapsize in number of entries */
tbl->it_mapsize = tbl->it_size * tbl->it_entrysize;
if (systemcfg->platform == PLATFORM_POWERMAC)
tbl->it_mapsize = tbl->it_size * (PAGE_SIZE / sizeof(unsigned int));
else
tbl->it_mapsize = tbl->it_size * (PAGE_SIZE / sizeof(union tce_entry));
/* sz is the number of bytes needed for the bitmap */
sz = (tbl->it_mapsize + 7) >> 3;
tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
if (!tbl->it_map)
panic("iommu_init_table: Can't allocate memory, size %ld bytes\n", sz);
memset(tbl->it_map, 0, sz);
tbl->it_hint = 0;
tbl->it_largehint = 0;
spin_lock_init(&tbl->it_lock);
if (!welcomed) {
printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
novmerge ? "disabled" : "enabled");
welcomed = 1;
}
return tbl;
}
int iommu_alloc_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems,
int direction, unsigned long *handle)
{
dma_addr_t dma_next, dma_addr;
unsigned long flags, vaddr, npages, entry;
struct scatterlist *s, *outs, *segstart, *ps;
int outcount;
/* Initialize some stuffs */
outs = s = segstart = &sglist[0];
outcount = 1;
ps = NULL;
/* Init first segment length for error handling */
outs->dma_length = 0;
DBG("mapping %d elements:\n", nelems);
spin_lock_irqsave(&(tbl->it_lock), flags);
for (s = outs; nelems; nelems--, s++) {
/* Allocate iommu entries for that segment */
vaddr = (unsigned long)page_address(s->page) + s->offset;
npages = PAGE_ALIGN(vaddr + s->length) - (vaddr & PAGE_MASK);
npages >>= PAGE_SHIFT;
entry = iommu_range_alloc(tbl, npages, handle);
DBG(" - vaddr: %lx, size: %lx\n", vaddr, s->length);
/* Handle failure */
if (unlikely(entry == NO_TCE)) {
if (printk_ratelimit())
printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
" npages %lx\n", tbl, vaddr, npages);
goto failure;
}
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
dma_addr = entry << PAGE_SHIFT;
dma_addr |= s->offset;
DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
/* Insert into HW table */
ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
/* If we are in an open segment, try merging */
if (segstart != s) {
DBG(" - trying merge...\n");
/* We cannot merge is:
* - allocated dma_addr isn't contiguous to previous allocation
* - current entry has an offset into the page
* - previous entry didn't end on a page boundary
*/
if (novmerge || (dma_addr != dma_next) || s->offset ||
(ps->offset + ps->length) % PAGE_SIZE) {
/* Can't merge: create a new segment */
segstart = s;
outcount++; outs++;
DBG(" can't merge, new segment.\n");
} else {
outs->dma_length += s->length;
DBG(" merged, new len: %lx\n", outs->dma_length);
}
}
/* If we are beginning a new segment, fill entries */
if (segstart == s) {
DBG(" - filling new segment.\n");
outs->dma_address = dma_addr;
outs->dma_length = s->length;
}
/* Calculate next page pointer for contiguous check */
dma_next = (dma_addr & PAGE_MASK) + (npages << PAGE_SHIFT);
DBG(" - dma next is: %lx\n", dma_next);
/* Keep a pointer to the previous entry */
ps = s;
}
/* Make sure the update is visible to hardware. */
mb();
/* Flush/invalidate TLBs if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
DBG("mapped %d elements:\n", outcount);
/* For the sake of iommu_free_sg, we clear out the length in the
* next entry of the sglist if we didn't fill the list completely
*/
if (outcount < nelems) {
outs++;
outs->dma_address = NO_TCE;
outs->dma_length = 0;
}
return outcount;
failure:
spin_unlock_irqrestore(&(tbl->it_lock), flags);
for (s = &sglist[0]; s <= outs; s++) {
if (s->dma_length != 0) {
vaddr = s->dma_address & PAGE_MASK;
npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
>> PAGE_SHIFT;
iommu_free(tbl, vaddr, npages);
}
}
return 0;
}
void iommu_free_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems,
int direction)
{
unsigned long flags;
/* Lock the whole operation to try to free as a "chunk" */
spin_lock_irqsave(&(tbl->it_lock), flags);
while (nelems--) {
unsigned int npages;
dma_addr_t dma_handle = sglist->dma_address;
if (sglist->dma_length == 0)
break;
npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
- (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
__iommu_free(tbl, dma_handle, npages);
sglist++;
}
/* Flush/invalidate TLBs if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
/*
* arch/ppc64/kernel/pSeries_iommu.c
*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
* Rewrite, cleanup:
*
* Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
*
* Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include "pci.h"
/* Only used to pass OF initialization data set in prom.c into the main
* kernel code -- data ultimately copied into regular tce tables.
*/
extern struct _of_tce_table of_tce_table[];
extern struct pci_controller *hose_head;
extern struct pci_controller **hose_tail;
static void tce_build_pSeries(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
int direction)
{
union tce_entry t;
union tce_entry *tp;
t.te_word = 0;
t.te_rdwr = 1; // Read allowed
if (direction != PCI_DMA_TODEVICE)
t.te_pciwr = 1;
tp = ((union tce_entry *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross LMB boundary */
t.te_rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
tp->te_word = t.te_word;
uaddr += PAGE_SIZE;
tp++;
}
}
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
union tce_entry t;
union tce_entry *tp;
t.te_word = 0;
tp = ((union tce_entry *)tbl->it_base) + index;
while (npages--) {
tp->te_word = t.te_word;
tp++;
}
}
static void iommu_buses_init(void)
{
struct pci_controller* phb;
struct device_node *dn, *first_dn;
int num_slots, num_slots_ilog2;
int first_phb = 1;
/* XXX Should we be using pci_root_buses instead? -ojn
*/
for (phb=hose_head; phb; phb=phb->next) {
first_dn = ((struct device_node *)phb->arch_data)->child;
/* Carve 2GB into the largest dma_window_size possible */
for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling)
num_slots++;
num_slots_ilog2 = __ilog2(num_slots);
if ((1<<num_slots_ilog2) != num_slots)
num_slots_ilog2++;
phb->dma_window_size = 1 << (22 - num_slots_ilog2);
/* Reserve 16MB of DMA space on the first PHB.
* We should probably be more careful and use firmware props.
* In reality this space is remapped, not lost. But we don't
* want to get that smart to handle it -- too much work.
*/
phb->dma_window_base_cur = first_phb ? (1 << 12) : 0;
first_phb = 0;
for (dn = first_dn; dn != NULL; dn = dn->sibling)
iommu_devnode_init(dn);
}
}
static void iommu_buses_init_lpar(struct list_head *bus_list)
{
struct list_head *ln;
struct pci_bus *bus;
struct device_node *busdn;
unsigned int *dma_window;
for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
bus = pci_bus_b(ln);
busdn = PCI_GET_DN(bus);
dma_window = (unsigned int *)get_property(busdn, "ibm,dma-window", 0);
if (dma_window) {
/* Bussubno hasn't been copied yet.
* Do it now because iommu_table_setparms_lpar needs it.
*/
busdn->bussubno = bus->number;
iommu_devnode_init(busdn);
}
/* look for a window on a bridge even if the PHB had one */
iommu_buses_init_lpar(&bus->children);
}
}
static void iommu_table_setparms(struct pci_controller *phb,
struct device_node *dn,
struct iommu_table *tbl)
{
phandle node;
unsigned long i;
struct _of_tce_table *oft;
node = ((struct device_node *)(phb->arch_data))->node;
oft = NULL;
for (i=0; of_tce_table[i].node; i++)
if(of_tce_table[i].node == node) {
oft = &of_tce_table[i];
break;
}
if (!oft)
panic("PCI_DMA: iommu_table_setparms: Can't find phb named '%s' in of_tce_table\n", dn->full_name);
memset((void *)oft->base, 0, oft->size);
tbl->it_busno = phb->bus->number;
/* Units of tce entries */
tbl->it_offset = phb->dma_window_base_cur;
/* Adjust the current table offset to the next
* region. Measured in TCE entries. Force an
* alignment to the size allotted per IOA. This
* makes it easier to remove the 1st 16MB.
*/
phb->dma_window_base_cur += (phb->dma_window_size>>3);
phb->dma_window_base_cur &=
~((phb->dma_window_size>>3)-1);
/* Set the tce table size - measured in pages */
tbl->it_size = ((phb->dma_window_base_cur -
tbl->it_offset) << 3) >> PAGE_SHIFT;
/* Test if we are going over 2GB of DMA space */
if (phb->dma_window_base_cur > (1 << 19))
panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
tbl->it_base = oft->base;
tbl->it_index = 0;
tbl->it_entrysize = sizeof(union tce_entry);
tbl->it_blocksize = 16;
}
/*
* iommu_table_setparms_lpar
*
* Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
*
* ToDo: properly interpret the ibm,dma-window property. The definition is:
* logical-bus-number (1 word)
* phys-address (#address-cells words)
* size (#cell-size words)
*
* Currently we hard code these sizes (more or less).
*/
static void iommu_table_setparms_lpar(struct pci_controller *phb,
struct device_node *dn,
struct iommu_table *tbl)
{
unsigned int *dma_window;
dma_window = (unsigned int *)get_property(dn, "ibm,dma-window", 0);
if (!dma_window)
panic("iommu_table_setparms_lpar: device %s has no"
" ibm,dma-window property!\n", dn->full_name);
tbl->it_busno = dn->bussubno;
tbl->it_size = (((((unsigned long)dma_window[4] << 32) |
(unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
tbl->it_offset = ((((unsigned long)dma_window[2] << 32) |
(unsigned long)dma_window[3]) >> 12);
tbl->it_base = 0;
tbl->it_index = dma_window[0];
tbl->it_entrysize = sizeof(union tce_entry);
tbl->it_blocksize = 16;
}
void iommu_devnode_init(struct device_node *dn)
{
struct iommu_table *tbl;
tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
GFP_KERNEL);
if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
iommu_table_setparms_lpar(dn->phb, dn, tbl);
else
iommu_table_setparms(dn->phb, dn, tbl);
dn->iommu_table = iommu_init_table(tbl);
}
void iommu_setup_pSeries(void)
{
struct pci_dev *dev = NULL;
struct device_node *dn, *mydn;
if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
iommu_buses_init_lpar(&pci_root_buses);
else
iommu_buses_init();
/* Now copy the iommu_table ptr from the bus devices down to every
* pci device_node. This means get_iommu_table() won't need to search
* up the device tree to find it.
*/
while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
mydn = dn = PCI_GET_DN(dev);
while (dn && dn->iommu_table == NULL)
dn = dn->parent;
if (dn)
mydn->iommu_table = dn->iommu_table;
}
}
/* These are called very early. */
void tce_init_pSeries(void)
{
ppc_md.tce_build = tce_build_pSeries;
ppc_md.tce_free = tce_free_pSeries;
pci_iommu_init();
}
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <asm/abs_addr.h> #include <asm/abs_addr.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -122,51 +122,59 @@ long plpar_put_term_char(unsigned long termno, ...@@ -122,51 +122,59 @@ long plpar_put_term_char(unsigned long termno,
lbuf[1]); lbuf[1]);
} }
static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum, static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages,
unsigned long uaddr, int direction ) unsigned long uaddr, int direction )
{ {
u64 set_tce_rc; u64 rc;
union Tce tce; union tce_entry tce;
PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr); tce.te_word = 0;
PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n", tce.te_rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
tcenum, tbl, tbl->index); tce.te_rdwr = 1;
if (direction != PCI_DMA_TODEVICE)
tce.wholeTce = 0; tce.te_pciwr = 1;
tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
while (npages--) {
tce.tceBits.readWrite = 1; rc = plpar_tce_put((u64)tbl->it_index,
if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1; (u64)tcenum << 12,
tce.te_word );
set_tce_rc = plpar_tce_put((u64)tbl->index,
(u64)tcenum << 12, if(rc && printk_ratelimit()) {
tce.wholeTce ); printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
if(set_tce_rc) { printk("\ttcenum = 0x%lx\n", (u64)tcenum);
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", set_tce_rc); printk("\ttce val = 0x%lx\n", tce.te_word );
printk("\tindex = 0x%lx\n", (u64)tbl->index); show_stack(current, (unsigned long *)__get_SP());
printk("\ttcenum = 0x%lx\n", (u64)tcenum); }
printk("\ttce val = 0x%lx\n", tce.wholeTce );
tcenum++;
tce.te_rpn++;
} }
} }
static void tce_free_one_pSeriesLP(struct TceTable *tbl, long tcenum) static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
{ {
u64 set_tce_rc; u64 rc;
union Tce tce; union tce_entry tce;
tce.wholeTce = 0; tce.te_word = 0;
set_tce_rc = plpar_tce_put((u64)tbl->index,
(u64)tcenum << 12, while (npages--) {
tce.wholeTce ); rc = plpar_tce_put((u64)tbl->it_index,
if ( set_tce_rc ) { (u64)tcenum << 12,
printk("tce_free_one_pSeriesLP: plpar_tce_put failed\n"); tce.te_word );
printk("\trc = %ld\n", set_tce_rc);
printk("\tindex = 0x%lx\n", (u64)tbl->index); if (rc && printk_ratelimit()) {
printk("\ttcenum = 0x%lx\n", (u64)tcenum); printk("tce_free_pSeriesLP: plpar_tce_put failed\n");
printk("\ttce val = 0x%lx\n", tce.wholeTce ); printk("\trc = %ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
printk("\ttcenum = 0x%lx\n", (u64)tcenum);
printk("\ttce val = 0x%lx\n", tce.te_word );
show_stack(current, (unsigned long *)__get_SP());
}
tcenum++;
} }
} }
int vtermno; /* virtual terminal# for udbg */ int vtermno; /* virtual terminal# for udbg */
...@@ -298,8 +306,10 @@ void pSeriesLP_init_early(void) ...@@ -298,8 +306,10 @@ void pSeriesLP_init_early(void)
tce_init_pSeries(); tce_init_pSeries();
ppc_md.tce_build = tce_build_pSeriesLP; ppc_md.tce_build = tce_build_pSeriesLP;
ppc_md.tce_free_one = tce_free_one_pSeriesLP; ppc_md.tce_free = tce_free_pSeriesLP;
pci_iommu_init();
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
smp_init_pSeries(); smp_init_pSeries();
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
#include <asm/pci-bridge.h> #include <asm/pci-bridge.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include "open_pic.h" #include "open_pic.h"
#include "pci.h" #include "pci.h"
...@@ -699,7 +699,7 @@ void __init pSeries_final_fixup(void) ...@@ -699,7 +699,7 @@ void __init pSeries_final_fixup(void)
phbs_fixup_io(); phbs_fixup_io();
chrp_request_regions(); chrp_request_regions();
pci_fix_bus_sysdata(); pci_fix_bus_sysdata();
create_tce_tables(); iommu_setup_pSeries();
} }
/*********************************************************************** /***********************************************************************
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <asm/machdep.h> #include <asm/machdep.h>
#include "pci.h" #include "pci.h"
......
/*
* pci_dma.c
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
* Dynamic DMA mapping support.
*
* Manages the TCE space assigned to this partition.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/ppcdebug.h>
#include <asm/iSeries/HvCallXm.h>
#include <asm/iSeries/LparData.h>
#include <asm/pci_dma.h>
#include <asm/pci-bridge.h>
#include <asm/iSeries/iSeries_pci.h>
#include <asm/machdep.h>
#include "pci.h"
/* #define DEBUG_TCE 1 */
/* #define MONITOR_TCE 1 */ /* Turn on to sanity check TCE generation. */
#ifdef CONFIG_PPC_PSERIES
/* Initialize so this guy does not end up in the BSS section.
* Only used to pass OF initialization data set in prom.c into the main
* kernel code -- data ultimately copied into tceTables[].
*/
extern struct _of_tce_table of_tce_table[];
#endif
extern struct pci_controller* hose_head;
extern struct pci_controller** hose_tail;
extern struct list_head iSeries_Global_Device_List;
struct TceTable virtBusVethTceTable; /* Tce table for virtual ethernet */
struct TceTable virtBusVioTceTable; /* Tce table for virtual I/O */
struct iSeries_Device_Node iSeries_veth_dev_node = { .LogicalSlot = 0xFF, .DevTceTable = &virtBusVethTceTable };
struct iSeries_Device_Node iSeries_vio_dev_node = { .LogicalSlot = 0xFF, .DevTceTable = &virtBusVioTceTable };
struct pci_dev iSeries_veth_dev_st = { .sysdata = &iSeries_veth_dev_node };
struct pci_dev iSeries_vio_dev_st = { .sysdata = &iSeries_vio_dev_node, .dev.bus = &pci_bus_type };
struct pci_dev * iSeries_veth_dev = &iSeries_veth_dev_st;
struct device * iSeries_vio_dev = &iSeries_vio_dev_st.dev;
/* Device TceTable is stored in Device Node */
/* struct TceTable * tceTables[256]; */ /* Tce tables for 256 busses
* Bus 255 is the virtual bus
* zero indicates no bus defined
*/
/* allocates a contiguous range of tces (power-of-2 size) */
static inline long alloc_tce_range(struct TceTable *,
unsigned order );
/* allocates a contiguous range of tces (power-of-2 size)
* assumes lock already held
*/
static long alloc_tce_range_nolock(struct TceTable *,
unsigned order );
/* frees a contiguous range of tces (power-of-2 size) */
static inline void free_tce_range(struct TceTable *,
long tcenum,
unsigned order );
/* frees a contiguous rnage of tces (power-of-2 size)
* assumes lock already held
*/
void free_tce_range_nolock(struct TceTable *,
long tcenum,
unsigned order );
/* allocates a range of tces and sets them to the pages */
inline dma_addr_t get_tces( struct TceTable *,
unsigned order,
void *page,
unsigned numPages,
int direction );
static long test_tce_range( struct TceTable *,
long tcenum,
unsigned order );
static void getTceTableParmsiSeries(struct iSeries_Device_Node* DevNode,
struct TceTable *tce_table_parms );
static void getTceTableParmsPSeries( struct pci_controller *phb,
struct device_node *dn,
struct TceTable *tce_table_parms );
static void getTceTableParmsPSeriesLP(struct pci_controller *phb,
struct device_node *dn,
struct TceTable *newTceTable );
static struct TceTable* findHwTceTable(struct TceTable * newTceTable );
void create_pci_bus_tce_table( unsigned long token );
u8 iSeries_Get_Bus( struct pci_dev * dv )
{
return 0;
}
static inline struct TceTable *get_tce_table(struct pci_dev *dev)
{
if (!dev)
dev = ppc64_isabridge_dev;
if (!dev)
return NULL;
if (systemcfg->platform == PLATFORM_ISERIES_LPAR) {
return ISERIES_DEVNODE(dev)->DevTceTable;
} else {
return PCI_GET_DN(dev)->tce_table;
}
}
static unsigned long __inline__ count_leading_zeros64( unsigned long x )
{
unsigned long lz;
asm("cntlzd %0,%1" : "=r"(lz) : "r"(x));
return lz;
}
#ifdef CONFIG_PPC_ISERIES
static void tce_build_iSeries(struct TceTable *tbl, long tcenum,
unsigned long uaddr, int direction )
{
u64 setTceRc;
union Tce tce;
PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n",
tcenum, tbl, tbl->index);
tce.wholeTce = 0;
tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
/* If for virtual bus */
if ( tbl->tceType == TCE_VB ) {
tce.tceBits.valid = 1;
tce.tceBits.allIo = 1;
if ( direction != PCI_DMA_TODEVICE )
tce.tceBits.readWrite = 1;
} else {
/* If for PCI bus */
tce.tceBits.readWrite = 1; // Read allowed
if ( direction != PCI_DMA_TODEVICE )
tce.tceBits.pciWrite = 1;
}
setTceRc = HvCallXm_setTce((u64)tbl->index,
(u64)tcenum,
tce.wholeTce );
if(setTceRc) {
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", setTceRc);
}
}
#endif
#ifdef CONFIG_PPC_PSERIES
static void tce_build_pSeries(struct TceTable *tbl, long tcenum,
unsigned long uaddr, int direction )
{
union Tce tce;
union Tce *tce_addr;
PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n",
tcenum, tbl, tbl->index);
tce.wholeTce = 0;
tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
tce.tceBits.readWrite = 1; // Read allowed
if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1;
tce_addr = ((union Tce *)tbl->base) + tcenum;
*tce_addr = (union Tce)tce.wholeTce;
}
#endif
/*
* Build a TceTable structure. This contains a multi-level bit map which
* is used to manage allocation of the tce space.
*/
struct TceTable *build_tce_table( struct TceTable * tbl )
{
unsigned long bits, bytes, totalBytes;
unsigned long numBits[NUM_TCE_LEVELS], numBytes[NUM_TCE_LEVELS];
unsigned i, k, m;
unsigned char * pos, * p, b;
PPCDBG(PPCDBG_TCEINIT, "build_tce_table: tbl = 0x%lx\n", tbl);
spin_lock_init( &(tbl->lock) );
tbl->mlbm.maxLevel = 0;
/* Compute number of bits and bytes for each level of the
* multi-level bit map
*/
totalBytes = 0;
bits = tbl->size * (PAGE_SIZE / sizeof( union Tce ));
for ( i=0; i<NUM_TCE_LEVELS; ++i ) {
bytes = ((bits+63)/64) * 8;
PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d bits=%ld, bytes=%ld\n", i, bits, bytes );
numBits[i] = bits;
numBytes[i] = bytes;
bits /= 2;
totalBytes += bytes;
}
PPCDBG(PPCDBG_TCEINIT, "build_tce_table: totalBytes=%ld\n", totalBytes );
pos = (char *)__get_free_pages( GFP_ATOMIC, get_order( totalBytes ));
if ( pos == NULL ) {
panic("PCI_DMA: Allocation failed in build_tce_table!\n");
}
/* For each level, fill in the pointer to the bit map,
* and turn on the last bit in the bit map (if the
* number of bits in the map is odd). The highest
* level will get all of its bits turned on.
*/
memset( pos, 0, totalBytes );
for (i=0; i<NUM_TCE_LEVELS; ++i) {
if ( numBytes[i] ) {
tbl->mlbm.level[i].map = pos;
tbl->mlbm.maxLevel = i;
if ( numBits[i] & 1 ) {
p = pos + numBytes[i] - 1;
m = (( numBits[i] % 8) - 1) & 7;
*p = 0x80 >> m;
PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d last bit %x\n", i, 0x80>>m );
}
}
else
tbl->mlbm.level[i].map = 0;
pos += numBytes[i];
tbl->mlbm.level[i].numBits = numBits[i];
tbl->mlbm.level[i].numBytes = numBytes[i];
}
/* For the highest level, turn on all the bits */
i = tbl->mlbm.maxLevel;
p = tbl->mlbm.level[i].map;
m = numBits[i];
PPCDBG(PPCDBG_TCEINIT, "build_tce_table: highest level (%d) has all bits set\n", i);
for (k=0; k<numBytes[i]; ++k) {
if ( m >= 8 ) {
/* handle full bytes */
*p++ = 0xff;
m -= 8;
}
else if(m>0) {
/* handle the last partial byte */
b = 0x80;
*p = 0;
while (m) {
*p |= b;
b >>= 1;
--m;
}
} else {
break;
}
}
return tbl;
}
static inline long alloc_tce_range( struct TceTable *tbl, unsigned order )
{
long retval;
unsigned long flags;
/* Lock the tce allocation bitmap */
spin_lock_irqsave( &(tbl->lock), flags );
/* Do the actual work */
retval = alloc_tce_range_nolock( tbl, order );
/* Unlock the tce allocation bitmap */
spin_unlock_irqrestore( &(tbl->lock), flags );
return retval;
}
static long alloc_tce_range_nolock( struct TceTable *tbl, unsigned order )
{
unsigned long numBits, numBytes;
unsigned long i, bit, block, mask;
long tcenum;
u64 * map;
/* If the order (power of 2 size) requested is larger than our
* biggest, indicate failure
*/
if(order >= NUM_TCE_LEVELS) {
/* This can happen if block of TCE's are not found. This code */
/* maybe in a recursive loop looking up the bit map for the range.*/
panic("PCI_DMA: alloc_tce_range_nolock: invalid order: %d\n",order);
}
numBits = tbl->mlbm.level[order].numBits;
numBytes = tbl->mlbm.level[order].numBytes;
map = (u64 *)tbl->mlbm.level[order].map;
/* Initialize return value to -1 (failure) */
tcenum = -1;
/* Loop through the bytes of the bitmap */
for (i=0; i<numBytes/8; ++i) {
if ( *map ) {
/* A free block is found, compute the block
* number (of this size)
*/
bit = count_leading_zeros64( *map );
block = (i * 64) + bit; /* Bit count to free entry */
/* turn off the bit in the map to indicate
* that the block is now in use
*/
mask = 0x1UL << (63 - bit);
*map &= ~mask;
/* compute the index into our tce table for
* the first tce in the block
*/
PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: allocating block %ld, (byte=%ld, bit=%ld) order %d\n", block, i, bit, order );
tcenum = block << order;
return tcenum;
}
++map;
}
#ifdef DEBUG_TCE
if ( tcenum == -1 ) {
PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: no available blocks of order = %d\n", order );
if ( order < tbl->mlbm.maxLevel ) {
PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: trying next bigger size\n" );
}
else {
panic("PCI_DMA: alloc_tce_range_nolock: maximum size reached...failing\n");
}
}
#endif
/* If no block of the requested size was found, try the next
* size bigger. If one of those is found, return the second
* half of the block to freespace and keep the first half
*/
if((tcenum == -1) && (order < (NUM_TCE_LEVELS - 1))) {
tcenum = alloc_tce_range_nolock( tbl, order+1 );
if ( tcenum != -1 ) {
free_tce_range_nolock( tbl, tcenum+(1<<order), order );
}
}
/* Return the index of the first tce in the block
* (or -1 if we failed)
*/
return tcenum;
}
static inline void free_tce_range(struct TceTable *tbl,
long tcenum, unsigned order )
{
unsigned long flags;
/* Lock the tce allocation bitmap */
spin_lock_irqsave( &(tbl->lock), flags );
/* Do the actual work */
free_tce_range_nolock( tbl, tcenum, order );
/* Unlock the tce allocation bitmap */
spin_unlock_irqrestore( &(tbl->lock), flags );
}
void free_tce_range_nolock(struct TceTable *tbl,
long tcenum, unsigned order )
{
unsigned long block;
unsigned byte, bit, mask, b;
unsigned char * map, * bytep;
if (order >= NUM_TCE_LEVELS) {
panic("PCI_DMA: free_tce_range: invalid order: 0x%x\n",order);
return;
}
block = tcenum >> order;
#ifdef MONITOR_TCE
if ( tcenum != (block << order ) ) {
printk("PCI_DMA: Free_tce_range: tcenum %lx misaligned for order %x\n",tcenum, order);
return;
}
if ( block >= tbl->mlbm.level[order].numBits ) {
printk("PCI_DMA: Free_tce_range: tcenum %lx is outside the range of this map (order %x, numBits %lx\n",
tcenum, order, tbl->mlbm.level[order].numBits );
return;
}
if ( test_tce_range( tbl, tcenum, order ) ) {
printk("PCI_DMA: Freeing range not allocated: tTceTable %p, tcenum %lx, order %x\n",tbl, tcenum, order );
return;
}
#endif
map = tbl->mlbm.level[order].map;
byte = block / 8;
bit = block % 8;
mask = 0x80 >> bit;
bytep = map + byte;
#ifdef DEBUG_TCE
PPCDBG(PPCDBG_TCE,"free_tce_range_nolock: freeing block %ld (byte=%d, bit=%d) of order %d\n",
block, byte, bit, order);
#endif
#ifdef MONITOR_TCE
if ( *bytep & mask ) {
panic("PCI_DMA: Tce already free: TceTable %p, tcenum %lx, order %x\n",tbl,tcenum,order);
}
#endif
*bytep |= mask;
/* If there is a higher level in the bit map than this we may be
* able to buddy up this block with its partner.
* If this is the highest level we can't buddy up
* If this level has an odd number of bits and
* we are freeing the last block we can't buddy up
* Don't buddy up if it's in the first 1/4 of the level
*/
if (( order < tbl->mlbm.maxLevel ) &&
( block > (tbl->mlbm.level[order].numBits/4) ) &&
(( block < tbl->mlbm.level[order].numBits-1 ) ||
( 0 == ( tbl->mlbm.level[order].numBits & 1)))) {
/* See if we can buddy up the block we just freed */
bit &= 6; /* get to the first of the buddy bits */
mask = 0xc0 >> bit; /* build two bit mask */
b = *bytep & mask; /* Get the two bits */
if ( 0 == (b ^ mask) ) { /* If both bits are on */
/* both of the buddy blocks are free we can combine them */
*bytep ^= mask; /* turn off the two bits */
block = ( byte * 8 ) + bit; /* block of first of buddies */
tcenum = block << order;
/* free the buddied block */
PPCDBG(PPCDBG_TCE,
"free_tce_range: buddying blocks %ld & %ld\n",
block, block+1);
free_tce_range_nolock( tbl, tcenum, order+1 );
}
}
}
static long test_tce_range( struct TceTable *tbl, long tcenum, unsigned order )
{
unsigned long block;
unsigned byte, bit, mask, b;
long retval, retLeft, retRight;
unsigned char * map;
map = tbl->mlbm.level[order].map;
block = tcenum >> order;
byte = block / 8; /* Byte within bitmap */
bit = block % 8; /* Bit within byte */
mask = 0x80 >> bit;
b = (*(map+byte) & mask ); /* 0 if block is allocated, else free */
if ( b )
retval = 1; /* 1 == block is free */
else
retval = 0; /* 0 == block is allocated */
/* Test bits at all levels below this to ensure that all agree */
if (order) {
retLeft = test_tce_range( tbl, tcenum, order-1 );
retRight = test_tce_range( tbl, tcenum+(1<<(order-1)), order-1 );
if ( retLeft || retRight ) {
retval = 2;
}
}
/* Test bits at all levels above this to ensure that all agree */
return retval;
}
inline dma_addr_t get_tces( struct TceTable *tbl, unsigned order, void *page, unsigned numPages, int direction )
{
long tcenum;
unsigned long uaddr;
unsigned i;
dma_addr_t retTce = NO_TCE;
uaddr = (unsigned long)page & PAGE_MASK;
/* Allocate a range of tces */
tcenum = alloc_tce_range( tbl, order );
if ( tcenum != -1 ) {
/* We got the tces we wanted */
tcenum += tbl->startOffset; /* Offset into real TCE table */
retTce = tcenum << PAGE_SHIFT; /* Set the return dma address */
/* Setup a tce for each page */
for (i=0; i<numPages; ++i) {
ppc_md.tce_build(tbl, tcenum, uaddr, direction);
++tcenum;
uaddr += PAGE_SIZE;
}
/* Make sure the update is visible to hardware.
sync required to synchronize the update to
the TCE table with the MMIO that will send
the bus address to the IOA */
__asm__ __volatile__ ("sync" : : : "memory");
}
else {
panic("PCI_DMA: Tce Allocation failure in get_tces. 0x%p\n",tbl);
}
return retTce;
}
#ifdef CONFIG_PPC_ISERIES
void tce_free_one_iSeries( struct TceTable *tbl, long tcenum )
{
u64 set_tce_rc;
union Tce tce;
tce.wholeTce = 0;
set_tce_rc = HvCallXm_setTce((u64)tbl->index,
(u64)tcenum,
tce.wholeTce);
if ( set_tce_rc )
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", set_tce_rc);
}
#endif
#ifdef CONFIG_PPC_PSERIES
static void tce_free_one_pSeries( struct TceTable *tbl, long tcenum )
{
union Tce tce;
union Tce *tce_addr;
tce.wholeTce = 0;
tce_addr = ((union Tce *)tbl->base) + tcenum;
*tce_addr = (union Tce)tce.wholeTce;
}
#endif
void tce_free(struct TceTable *tbl, dma_addr_t dma_addr,
unsigned order, unsigned num_pages)
{
long tcenum, total_tces, free_tce;
unsigned i;
total_tces = (tbl->size * (PAGE_SIZE / sizeof(union Tce)));
tcenum = dma_addr >> PAGE_SHIFT;
free_tce = tcenum - tbl->startOffset;
if ( ( (free_tce + num_pages) > total_tces ) ||
( tcenum < tbl->startOffset ) ) {
printk("tce_free: invalid tcenum\n");
printk("\ttcenum = 0x%lx\n", tcenum);
printk("\tTCE Table = 0x%lx\n", (u64)tbl);
printk("\tbus# = 0x%lx\n", (u64)tbl->busNumber );
printk("\tsize = 0x%lx\n", (u64)tbl->size);
printk("\tstartOff = 0x%lx\n", (u64)tbl->startOffset );
printk("\tindex = 0x%lx\n", (u64)tbl->index);
return;
}
for (i=0; i<num_pages; ++i) {
ppc_md.tce_free_one(tbl, tcenum);
++tcenum;
}
/* No sync (to make TCE change visible) is required here.
The lwsync when acquiring the lock in free_tce_range
is sufficient to synchronize with the bitmap.
*/
free_tce_range( tbl, free_tce, order );
}
#ifdef CONFIG_PPC_ISERIES
void __init create_virtual_bus_tce_table(void)
{
struct TceTable *t;
struct TceTableManagerCB virtBusTceTableParms;
u64 absParmsPtr;
virtBusTceTableParms.busNumber = 255; /* Bus 255 is the virtual bus */
virtBusTceTableParms.virtualBusFlag = 0xff; /* Ask for virtual bus */
absParmsPtr = virt_to_absolute( (u64)&virtBusTceTableParms );
HvCallXm_getTceTableParms( absParmsPtr );
virtBusVethTceTable.size = virtBusTceTableParms.size / 2;
virtBusVethTceTable.busNumber = virtBusTceTableParms.busNumber;
virtBusVethTceTable.startOffset = virtBusTceTableParms.startOffset;
virtBusVethTceTable.index = virtBusTceTableParms.index;
virtBusVethTceTable.tceType = TCE_VB;
virtBusVioTceTable.size = virtBusTceTableParms.size - virtBusVethTceTable.size;
virtBusVioTceTable.busNumber = virtBusTceTableParms.busNumber;
virtBusVioTceTable.startOffset = virtBusTceTableParms.startOffset +
virtBusVethTceTable.size * (PAGE_SIZE/sizeof(union Tce));
virtBusVioTceTable.index = virtBusTceTableParms.index;
virtBusVioTceTable.tceType = TCE_VB;
t = build_tce_table( &virtBusVethTceTable );
if ( t ) {
/* tceTables[255] = t; */
//VirtBusVethTceTable = t;
printk( "Virtual Bus VETH TCE table built successfully.\n");
printk( " TCE table size = %ld entries\n",
(unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) );
printk( " TCE table token = %d\n",
(unsigned)t->index );
printk( " TCE table start entry = 0x%lx\n",
(unsigned long)t->startOffset );
}
else printk( "Virtual Bus VETH TCE table failed.\n");
t = build_tce_table( &virtBusVioTceTable );
if ( t ) {
//VirtBusVioTceTable = t;
printk( "Virtual Bus VIO TCE table built successfully.\n");
printk( " TCE table size = %ld entries\n",
(unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) );
printk( " TCE table token = %d\n",
(unsigned)t->index );
printk( " TCE table start entry = 0x%lx\n",
(unsigned long)t->startOffset );
}
else printk( "Virtual Bus VIO TCE table failed.\n");
}
#endif
void create_tce_tables_for_buses(struct list_head *bus_list)
{
struct pci_controller* phb;
struct device_node *dn, *first_dn;
int num_slots, num_slots_ilog2;
int first_phb = 1;
for (phb=hose_head;phb;phb=phb->next) {
first_dn = ((struct device_node *)phb->arch_data)->child;
/* Carve 2GB into the largest dma_window_size possible */
for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling)
num_slots++;
num_slots_ilog2 = __ilog2(num_slots);
if ((1<<num_slots_ilog2) != num_slots)
num_slots_ilog2++;
phb->dma_window_size = 1 << (22 - num_slots_ilog2);
/* Reserve 16MB of DMA space on the first PHB.
* We should probably be more careful and use firmware props.
* In reality this space is remapped, not lost. But we don't
* want to get that smart to handle it -- too much work.
*/
phb->dma_window_base_cur = first_phb ? (1 << 12) : 0;
first_phb = 0;
for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling) {
create_pci_bus_tce_table((unsigned long)dn);
}
}
}
#ifdef CONFIG_PPC_PSERIES
void create_tce_tables_for_busesLP(struct list_head *bus_list)
{
struct list_head *ln;
struct pci_bus *bus;
struct device_node *busdn;
u32 *dma_window;
for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
bus = pci_bus_b(ln);
busdn = PCI_GET_DN(bus);
dma_window = (u32 *)get_property(busdn, "ibm,dma-window", 0);
if (dma_window) {
/* Bussubno hasn't been copied yet.
* Do it now because getTceTableParmsPSeriesLP needs it.
*/
busdn->bussubno = bus->number;
create_pci_bus_tce_table((unsigned long)busdn);
}
/* look for a window on a bridge even if the PHB had one */
create_tce_tables_for_busesLP(&bus->children);
}
}
#endif
void create_tce_tables(void) {
struct pci_dev *dev = NULL;
struct device_node *dn, *mydn;
#ifdef CONFIG_PPC_PSERIES
if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
create_tce_tables_for_busesLP(&pci_root_buses);
}
else
#endif
{
create_tce_tables_for_buses(&pci_root_buses);
}
/* Now copy the tce_table ptr from the bus devices down to every
* pci device_node. This means get_tce_table() won't need to search
* up the device tree to find it.
*/
while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
mydn = dn = PCI_GET_DN(dev);
while (dn && dn->tce_table == NULL)
dn = dn->parent;
if (dn) {
mydn->tce_table = dn->tce_table;
}
}
}
/*
* iSeries token = iSeries_device_Node*
* pSeries token = pci_controller*
*
*/
void create_pci_bus_tce_table( unsigned long token ) {
struct TceTable * newTceTable;
PPCDBG(PPCDBG_TCE, "Entering create_pci_bus_tce_table.\n");
PPCDBG(PPCDBG_TCE, "\ttoken = 0x%lx\n", token);
newTceTable = (struct TceTable *)kmalloc( sizeof(struct TceTable), GFP_KERNEL );
/*****************************************************************/
/* For the iSeries machines, the HvTce Table can be one of three */
/* flavors, */
/* - Single bus TCE table, */
/* - Tce Table Share between buses, */
/* - Tce Table per logical slot. */
/*****************************************************************/
if(systemcfg->platform == PLATFORM_ISERIES_LPAR) {
struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)token;
getTceTableParmsiSeries(DevNode,newTceTable);
/* Look for existing TCE table for this device. */
DevNode->DevTceTable = findHwTceTable(newTceTable );
if( DevNode->DevTceTable == NULL) {
DevNode->DevTceTable = build_tce_table( newTceTable );
}
else {
/* We're using a shared table, free this new one. */
kfree(newTceTable);
}
printk("Pci Device 0x%p TceTable: %p\n",DevNode,DevNode->DevTceTable);
return;
}
/* pSeries Leg */
else {
struct device_node *dn;
struct pci_controller *phb;
dn = (struct device_node *)token;
phb = dn->phb;
if (systemcfg->platform == PLATFORM_PSERIES)
getTceTableParmsPSeries(phb, dn, newTceTable);
else
getTceTableParmsPSeriesLP(phb, dn, newTceTable);
dn->tce_table = build_tce_table( newTceTable );
}
}
/***********************************************************************/
/* This function compares the known Tce tables to find a TceTable that */
/* has already been built for hardware TCEs. */
/* Search the complete(all devices) for a TCE table assigned. If the */
/* startOffset, index, and size match, then the TCE for this device has*/
/* already been built and it should be shared with this device */
/***********************************************************************/
static struct TceTable* findHwTceTable(struct TceTable * newTceTable )
{
#ifdef CONFIG_PPC_ISERIES
struct list_head* Device_Node_Ptr = iSeries_Global_Device_List.next;
/* Cache the compare values. */
u64 startOffset = newTceTable->startOffset;
u64 index = newTceTable->index;
u64 size = newTceTable->size;
while(Device_Node_Ptr != &iSeries_Global_Device_List) {
struct iSeries_Device_Node* CmprNode = (struct iSeries_Device_Node*)Device_Node_Ptr;
if( CmprNode->DevTceTable != NULL &&
CmprNode->DevTceTable->tceType == TCE_PCI) {
if( CmprNode->DevTceTable->startOffset == startOffset &&
CmprNode->DevTceTable->index == index &&
CmprNode->DevTceTable->size == size ) {
printk("PCI TCE table matches 0x%p \n",CmprNode->DevTceTable);
return CmprNode->DevTceTable;
}
}
/* Get next Device Node in List */
Device_Node_Ptr = Device_Node_Ptr->next;
}
#endif
return NULL;
}
/***********************************************************************/
/* Call Hv with the architected data structure to get TCE table info. */
/* info. Put the returned data into the Linux representation of the */
/* TCE table data. */
/* The Hardware Tce table comes in three flavors. */
/* 1. TCE table shared between Buses. */
/* 2. TCE table per Bus. */
/* 3. TCE Table per IOA. */
/***********************************************************************/
static void getTceTableParmsiSeries(struct iSeries_Device_Node* DevNode,
struct TceTable* newTceTable )
{
#ifdef CONFIG_PPC_ISERIES
struct TceTableManagerCB* pciBusTceTableParms = (struct TceTableManagerCB*)kmalloc( sizeof(struct TceTableManagerCB), GFP_KERNEL );
if(pciBusTceTableParms == NULL) panic("PCI_DMA: TCE Table Allocation failed.");
memset( (void*)pciBusTceTableParms,0,sizeof(struct TceTableManagerCB) );
pciBusTceTableParms->busNumber = ISERIES_BUS(DevNode);
pciBusTceTableParms->logicalSlot = DevNode->LogicalSlot;
pciBusTceTableParms->virtualBusFlag = 0;
HvCallXm_getTceTableParms( REALADDR(pciBusTceTableParms) );
/* PciTceTableParms Bus:0x18 Slot:0x04 Start:0x000000 Offset:0x04c000 Size:0x0020 */
printk("PciTceTableParms Bus:0x%02lx Slot:0x%02x Start:0x%06lx Offset:0x%06lx Size:0x%04lx\n",
pciBusTceTableParms->busNumber,
pciBusTceTableParms->logicalSlot,
pciBusTceTableParms->start,
pciBusTceTableParms->startOffset,
pciBusTceTableParms->size);
if(pciBusTceTableParms->size == 0) {
printk("PCI_DMA: Possible Structure mismatch, 0x%p\n",pciBusTceTableParms);
panic( "PCI_DMA: pciBusTceTableParms->size is zero, halt here!");
}
newTceTable->size = pciBusTceTableParms->size;
newTceTable->busNumber = pciBusTceTableParms->busNumber;
newTceTable->startOffset = pciBusTceTableParms->startOffset;
newTceTable->index = pciBusTceTableParms->index;
newTceTable->tceType = TCE_PCI;
kfree(pciBusTceTableParms);
#endif
}
static void getTceTableParmsPSeries(struct pci_controller *phb,
struct device_node *dn,
struct TceTable *newTceTable ) {
#ifdef CONFIG_PPC_PSERIES
phandle node;
unsigned long i;
node = ((struct device_node *)(phb->arch_data))->node;
PPCDBG(PPCDBG_TCEINIT, "getTceTableParms: start\n");
PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table = 0x%lx\n", of_tce_table);
PPCDBG(PPCDBG_TCEINIT, "\tphb = 0x%lx\n", phb);
PPCDBG(PPCDBG_TCEINIT, "\tdn = 0x%lx\n", dn);
PPCDBG(PPCDBG_TCEINIT, "\tdn->name = %s\n", dn->name);
PPCDBG(PPCDBG_TCEINIT, "\tdn->full_name= %s\n", dn->full_name);
PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable = 0x%lx\n", newTceTable);
PPCDBG(PPCDBG_TCEINIT, "\tdma_window_size = 0x%lx\n", phb->dma_window_size);
i = 0;
while(of_tce_table[i].node) {
PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].node = 0x%lx\n",
i, of_tce_table[i].node);
PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].base = 0x%lx\n",
i, of_tce_table[i].base);
PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].size = 0x%lx\n",
i, of_tce_table[i].size >> PAGE_SHIFT);
PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data->node = 0x%lx\n",
node);
if(of_tce_table[i].node == node) {
memset((void *)of_tce_table[i].base,
0, of_tce_table[i].size);
newTceTable->busNumber = phb->bus->number;
/* Units of tce entries. */
newTceTable->startOffset = phb->dma_window_base_cur;
/* Adjust the current table offset to the next */
/* region. Measured in TCE entries. Force an */
/* alignment to the size alloted per IOA. This */
/* makes it easier to remove the 1st 16MB. */
phb->dma_window_base_cur += (phb->dma_window_size>>3);
phb->dma_window_base_cur &=
~((phb->dma_window_size>>3)-1);
/* Set the tce table size - measured in units */
/* of pages of tce table. */
newTceTable->size = ((phb->dma_window_base_cur -
newTceTable->startOffset) << 3)
>> PAGE_SHIFT;
/* Test if we are going over 2GB of DMA space. */
if(phb->dma_window_base_cur > (1 << 19)) {
panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
}
newTceTable->base = of_tce_table[i].base;
newTceTable->index = 0;
PPCDBG(PPCDBG_TCEINIT,
"\tnewTceTable->base = 0x%lx\n",
newTceTable->base);
PPCDBG(PPCDBG_TCEINIT,
"\tnewTceTable->startOffset = 0x%lx"
"(# tce entries)\n",
newTceTable->startOffset);
PPCDBG(PPCDBG_TCEINIT,
"\tnewTceTable->size = 0x%lx"
"(# pages of tce table)\n",
newTceTable->size);
}
i++;
}
#endif
}
/*
* getTceTableParmsPSeriesLP
*
* Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
*
* ToDo: properly interpret the ibm,dma-window property. The definition is:
* logical-bus-number (1 word)
* phys-address (#address-cells words)
* size (#cell-size words)
*
* Currently we hard code these sizes (more or less).
*/
static void getTceTableParmsPSeriesLP(struct pci_controller *phb,
struct device_node *dn,
struct TceTable *newTceTable ) {
#ifdef CONFIG_PPC_PSERIES
u32 *dma_window = (u32 *)get_property(dn, "ibm,dma-window", 0);
if (!dma_window) {
panic("PCI_DMA: getTceTableParmsPSeriesLP: device %s has no ibm,dma-window property!\n", dn->full_name);
}
newTceTable->busNumber = dn->bussubno;
newTceTable->size = (((((unsigned long)dma_window[4] << 32) | (unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
newTceTable->startOffset = ((((unsigned long)dma_window[2] << 32) | (unsigned long)dma_window[3]) >> 12);
newTceTable->base = 0;
newTceTable->index = dma_window[0];
PPCDBG(PPCDBG_TCEINIT, "getTceTableParmsPSeriesLP for bus 0x%lx:\n", dn->bussubno);
PPCDBG(PPCDBG_TCEINIT, "\tDevice = %s\n", dn->full_name);
PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->index = 0x%lx\n", newTceTable->index);
PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->startOffset = 0x%lx\n", newTceTable->startOffset);
PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->size = 0x%lx\n", newTceTable->size);
#endif
}
/* Allocates a contiguous real buffer and creates TCEs over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (tce) of the first page.
*/
static void *tce_alloc_consistent(struct pci_dev *hwdev, size_t size,
dma_addr_t *dma_handle)
{
struct TceTable * tbl;
void *ret = NULL;
unsigned order, nPages;
dma_addr_t tce;
PPCDBG(PPCDBG_TCE, "pci_alloc_consistent:\n");
PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx\n", hwdev);
PPCDBG(PPCDBG_TCE, "\tsize = 0x%16.16lx\n", size);
PPCDBG(PPCDBG_TCE, "\tdma_handle = 0x%16.16lx\n", dma_handle);
size = PAGE_ALIGN(size);
order = get_order(size);
nPages = 1 << order;
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: pci_alloc_consistent size to large: 0x%lx \n",size);
return (void *)NO_TCE;
}
tbl = get_tce_table(hwdev);
if ( tbl ) {
/* Alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages( GFP_ATOMIC, order );
if ( ret ) {
/* Page allocation succeeded */
memset(ret, 0, nPages << PAGE_SHIFT);
/* Set up tces to cover the allocated range */
tce = get_tces( tbl, order, ret, nPages, PCI_DMA_BIDIRECTIONAL );
if ( tce == NO_TCE ) {
PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: get_tces failed\n" );
free_pages( (unsigned long)ret, order );
ret = NULL;
}
else
{
*dma_handle = tce;
}
}
else PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: __get_free_pages failed for order = %d\n", order);
}
else PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: get_tce_table failed for 0x%016lx\n", hwdev);
PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: dma_handle = 0x%16.16lx\n", *dma_handle);
PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: return = 0x%16.16lx\n", ret);
return ret;
}
static void tce_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
struct TceTable * tbl;
unsigned order, nPages;
PPCDBG(PPCDBG_TCE, "pci_free_consistent:\n");
PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, dma_handle = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, dma_handle, vaddr);
size = PAGE_ALIGN(size);
order = get_order(size);
nPages = 1 << order;
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: pci_free_consistent size to large: 0x%lx \n",size);
return;
}
tbl = get_tce_table(hwdev);
if ( tbl ) {
tce_free(tbl, dma_handle, order, nPages);
free_pages( (unsigned long)vaddr, order );
}
}
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address of the buffer
* passed here is the kernel (virtual) address of the buffer. The buffer
* need not be page aligned, the dma_addr_t returned will point to the same
* byte within the page as vaddr.
*/
static dma_addr_t tce_map_single(struct pci_dev *hwdev, void *vaddr,
size_t size, int direction )
{
struct TceTable * tbl;
dma_addr_t dma_handle = NO_TCE;
unsigned long uaddr;
unsigned order, nPages;
PPCDBG(PPCDBG_TCE, "pci_map_single:\n");
PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, direction, vaddr);
if ( direction == PCI_DMA_NONE )
BUG();
uaddr = (unsigned long)vaddr;
nPages = PAGE_ALIGN( uaddr + size ) - ( uaddr & PAGE_MASK );
order = get_order( nPages & PAGE_MASK );
nPages >>= PAGE_SHIFT;
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: pci_map_single size to large: 0x%lx \n",size);
return NO_TCE;
}
tbl = get_tce_table(hwdev);
if ( tbl ) {
dma_handle = get_tces( tbl, order, vaddr, nPages, direction );
dma_handle |= ( uaddr & ~PAGE_MASK );
}
return dma_handle;
}
static void tce_unmap_single( struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction )
{
struct TceTable * tbl;
unsigned order, nPages;
PPCDBG(PPCDBG_TCE, "pci_unmap_single:\n");
PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, dma_handle = 0x%16.16lx\n", hwdev, size, direction, dma_handle);
if ( direction == PCI_DMA_NONE )
BUG();
nPages = PAGE_ALIGN( dma_handle + size ) - ( dma_handle & PAGE_MASK );
order = get_order( nPages & PAGE_MASK );
nPages >>= PAGE_SHIFT;
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: pci_unmap_single 0x%lx size too"
" large: 0x%lx \n", (long)dma_handle, (long)size);
return;
}
tbl = get_tce_table(hwdev);
if ( tbl )
tce_free(tbl, dma_handle, order, nPages);
}
#if 0
/* Figure out how many TCEs are actually going to be required
* to map this scatterlist. This code is not optimal. It
* takes into account the case where entry n ends in the same
* page in which entry n+1 starts. It does not handle the
* general case of entry n ending in the same page in which
* entry m starts.
*/
static unsigned long num_tces_sg( struct scatterlist *sg, int nents )
{
unsigned long nTces, numPages, startPage, endPage, prevEndPage;
unsigned i;
prevEndPage = 0;
nTces = 0;
for (i=0; i<nents; ++i) {
/* Compute the starting page number and
* the ending page number for this entry
*/
startPage = (unsigned long)sg->address >> PAGE_SHIFT;
endPage = ((unsigned long)sg->address + sg->length - 1) >> PAGE_SHIFT;
numPages = endPage - startPage + 1;
/* Simple optimization: if the previous entry ended
* in the same page in which this entry starts
* then we can reduce the required pages by one.
* This matches assumptions in fill_scatterlist_sg and
* create_tces_sg
*/
if ( startPage == prevEndPage )
--numPages;
nTces += numPages;
prevEndPage = endPage;
sg++;
}
return nTces;
}
/* Fill in the dma data in the scatterlist
* return the number of dma sg entries created
*/
static unsigned fill_scatterlist_sg( struct scatterlist *sg, int nents,
dma_addr_t dma_addr , unsigned long numTces)
{
struct scatterlist *dma_sg;
u32 cur_start_dma;
unsigned long cur_len_dma, cur_end_virt, uaddr;
unsigned num_dma_ents;
dma_sg = sg;
num_dma_ents = 1;
/* Process the first sg entry */
cur_start_dma = dma_addr + ((unsigned long)sg->address & (~PAGE_MASK));
cur_len_dma = sg->length;
/* cur_end_virt holds the address of the byte immediately after the
* end of the current buffer.
*/
cur_end_virt = (unsigned long)sg->address + cur_len_dma;
/* Later code assumes that unused sg->dma_address and sg->dma_length
* fields will be zero. Other archs seem to assume that the user
* (device driver) guarantees that...I don't want to depend on that
*/
sg->dma_address = sg->dma_length = 0;
/* Process the rest of the sg entries */
while (--nents) {
++sg;
/* Clear possibly unused fields. Note: sg >= dma_sg so
* this can't be clearing a field we've already set
*/
sg->dma_address = sg->dma_length = 0;
/* Check if it is possible to make this next entry
* contiguous (in dma space) with the previous entry.
*/
/* The entries can be contiguous in dma space if
* the previous entry ends immediately before the
* start of the current entry (in virtual space)
* or if the previous entry ends at a page boundary
* and the current entry starts at a page boundary.
*/
uaddr = (unsigned long)sg->address;
if ( ( uaddr != cur_end_virt ) &&
( ( ( uaddr | cur_end_virt ) & (~PAGE_MASK) ) ||
( ( uaddr & PAGE_MASK ) == ( ( cur_end_virt-1 ) & PAGE_MASK ) ) ) ) {
/* This entry can not be contiguous in dma space.
* save the previous dma entry and start a new one
*/
dma_sg->dma_address = cur_start_dma;
dma_sg->dma_length = cur_len_dma;
++dma_sg;
++num_dma_ents;
cur_start_dma += cur_len_dma-1;
/* If the previous entry ends and this entry starts
* in the same page then they share a tce. In that
* case don't bump cur_start_dma to the next page
* in dma space. This matches assumptions made in
* num_tces_sg and create_tces_sg.
*/
if ((uaddr & PAGE_MASK) == ((cur_end_virt-1) & PAGE_MASK))
cur_start_dma &= PAGE_MASK;
else
cur_start_dma = PAGE_ALIGN(cur_start_dma+1);
cur_start_dma += ( uaddr & (~PAGE_MASK) );
cur_len_dma = 0;
}
/* Accumulate the length of this entry for the next
* dma entry
*/
cur_len_dma += sg->length;
cur_end_virt = uaddr + sg->length;
}
/* Fill in the last dma entry */
dma_sg->dma_address = cur_start_dma;
dma_sg->dma_length = cur_len_dma;
if ((((cur_start_dma +cur_len_dma - 1)>> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1) != numTces)
{
PPCDBG(PPCDBG_TCE, "fill_scatterlist_sg: numTces %ld, used tces %d\n",
numTces,
(unsigned)(((cur_start_dma + cur_len_dma - 1) >> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1));
}
return num_dma_ents;
}
/* Call the hypervisor to create the TCE entries.
* return the number of TCEs created
*/
static dma_addr_t create_tces_sg( struct TceTable *tbl, struct scatterlist *sg,
int nents, unsigned numTces, int direction )
{
unsigned order, i, j;
unsigned long startPage, endPage, prevEndPage, numPages, uaddr;
long tcenum, starttcenum;
dma_addr_t dmaAddr;
dmaAddr = NO_TCE;
order = get_order( numTces << PAGE_SHIFT );
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: create_tces_sg size to large: 0x%x \n",(numTces << PAGE_SHIFT));
return NO_TCE;
}
/* allocate a block of tces */
tcenum = alloc_tce_range( tbl, order );
if ( tcenum != -1 ) {
tcenum += tbl->startOffset;
starttcenum = tcenum;
dmaAddr = tcenum << PAGE_SHIFT;
prevEndPage = 0;
for (j=0; j<nents; ++j) {
startPage = (unsigned long)sg->address >> PAGE_SHIFT;
endPage = ((unsigned long)sg->address + sg->length - 1) >> PAGE_SHIFT;
numPages = endPage - startPage + 1;
uaddr = (unsigned long)sg->address;
/* If the previous entry ended in the same page that
* the current page starts then they share that
* tce and we reduce the number of tces we need
* by one. This matches assumptions made in
* num_tces_sg and fill_scatterlist_sg
*/
if ( startPage == prevEndPage ) {
--numPages;
uaddr += PAGE_SIZE;
}
for (i=0; i<numPages; ++i) {
ppc_md.tce_build(tbl, tcenum, uaddr, direction);
++tcenum;
uaddr += PAGE_SIZE;
}
prevEndPage = endPage;
sg++;
}
/* Make sure the update is visible to hardware.
sync required to synchronize the update to
the TCE table with the MMIO that will send
the bus address to the IOA */
__asm__ __volatile__ ("sync" : : : "memory");
if ((tcenum - starttcenum) != numTces)
PPCDBG(PPCDBG_TCE, "create_tces_sg: numTces %d, tces used %d\n",
numTces, (unsigned)(tcenum - starttcenum));
}
return dmaAddr;
}
static int tce_map_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction )
{
struct TceTable * tbl;
unsigned numTces;
int num_dma;
dma_addr_t dma_handle;
PPCDBG(PPCDBG_TCE, "pci_map_sg:\n");
PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, sg = 0x%16.16lx, direction = 0x%16.16lx, nents = 0x%16.16lx\n", hwdev, sg, direction, nents);
/* Fast path for a single entry scatterlist */
if ( nents == 1 ) {
sg->dma_address = pci_map_single( hwdev, sg->address,
sg->length, direction );
sg->dma_length = sg->length;
return 1;
}
if ( direction == PCI_DMA_NONE )
BUG();
tbl = get_tce_table(hwdev);
if ( tbl ) {
/* Compute the number of tces required */
numTces = num_tces_sg( sg, nents );
/* Create the tces and get the dma address */
dma_handle = create_tces_sg( tbl, sg, nents, numTces, direction );
/* Fill in the dma scatterlist */
num_dma = fill_scatterlist_sg( sg, nents, dma_handle, numTces );
}
return num_dma;
}
static void tce_unmap_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nelms, int direction )
{
struct TceTable * tbl;
unsigned order, numTces, i;
dma_addr_t dma_end_page, dma_start_page;
PPCDBG(PPCDBG_TCE, "pci_unmap_sg:\n");
PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, sg = 0x%16.16lx, direction = 0x%16.16lx, nelms = 0x%16.16lx\n", hwdev, sg, direction, nelms);
if ( direction == PCI_DMA_NONE || nelms == 0 )
BUG();
dma_start_page = sg->dma_address & PAGE_MASK;
dma_end_page = 0;
for ( i=nelms; i>0; --i ) {
unsigned k = i - 1;
if ( sg[k].dma_length ) {
dma_end_page = ( sg[k].dma_address +
sg[k].dma_length - 1 ) & PAGE_MASK;
break;
}
}
numTces = ((dma_end_page - dma_start_page ) >> PAGE_SHIFT) + 1;
order = get_order( numTces << PAGE_SHIFT );
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: dma_start_page:0x%lx dma_end_page:0x%lx\n",dma_start_page,dma_end_page);
printk("PCI_DMA: pci_unmap_sg size to large: 0x%x \n",(numTces << PAGE_SHIFT));
return;
}
tbl = get_tce_table(hwdev);
if ( tbl )
tce_free( tbl, dma_start_page, order, numTces );
}
#else
static int tce_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
int direction)
{
int i;
for (i = 0; i < nelems; i++) {
void *vaddr = page_address(sglist->page) + sglist->offset;
sglist->dma_address = pci_map_single(pdev, vaddr,
sglist->length,
direction);
sglist->dma_length = sglist->length;
sglist++;
}
return nelems;
}
static void tce_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
int direction)
{
while (nelems--) {
pci_unmap_single(pdev, sglist->dma_address,
sglist->dma_length, direction);
sglist++;
}
}
#endif
#ifdef CONFIG_PPC_PSERIES
/* These are called very early. */
void tce_init_pSeries(void)
{
ppc_md.tce_build = tce_build_pSeries;
ppc_md.tce_free_one = tce_free_one_pSeries;
pci_dma_ops.pci_alloc_consistent = tce_alloc_consistent;
pci_dma_ops.pci_free_consistent = tce_free_consistent;
pci_dma_ops.pci_map_single = tce_map_single;
pci_dma_ops.pci_unmap_single = tce_unmap_single;
pci_dma_ops.pci_map_sg = tce_map_sg;
pci_dma_ops.pci_unmap_sg = tce_unmap_sg;
}
#endif
#ifdef CONFIG_PPC_ISERIES
void tce_init_iSeries(void)
{
ppc_md.tce_build = tce_build_iSeries;
ppc_md.tce_free_one = tce_free_one_iSeries;
pci_dma_ops.pci_alloc_consistent = tce_alloc_consistent;
pci_dma_ops.pci_free_consistent = tce_free_consistent;
pci_dma_ops.pci_map_single = tce_map_single;
pci_dma_ops.pci_unmap_single = tce_unmap_single;
pci_dma_ops.pci_map_sg = tce_map_sg;
pci_dma_ops.pci_unmap_sg = tce_unmap_sg;
}
#endif
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include <asm/pci-bridge.h> #include <asm/pci-bridge.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include "pci.h" #include "pci.h"
......
/*
* arch/ppc64/kernel/pci_iommu.c
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
* Rewrite, cleanup, new allocation schemes:
* Copyright (C) 2004 Olof Johansson, IBM Corporation
*
* Dynamic DMA mapping support, platform-independent parts.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include "pci.h"
#ifdef CONFIG_PPC_ISERIES
#include <asm/iSeries/iSeries_pci.h>
#endif /* CONFIG_PPC_ISERIES */
#define DBG(...)
static inline struct iommu_table *devnode_table(struct pci_dev *dev)
{
if (!dev)
dev = ppc64_isabridge_dev;
if (!dev)
return NULL;
#ifdef CONFIG_PPC_ISERIES
return ISERIES_DEVNODE(dev)->iommu_table;
#endif /* CONFIG_PPC_ISERIES */
#ifdef CONFIG_PPC_PSERIES
return PCI_GET_DN(dev)->iommu_table;
#endif /* CONFIG_PPC_PSERIES */
}
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
*/
void *pci_iommu_alloc_consistent(struct pci_dev *hwdev, size_t size,
dma_addr_t *dma_handle)
{
struct iommu_table *tbl;
void *ret = NULL;
dma_addr_t mapping;
unsigned int npages, order;
size = PAGE_ALIGN(size);
npages = size >> PAGE_SHIFT;
order = get_order(size);
/* Client asked for way too much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if (order >= IOMAP_MAX_ORDER) {
printk("PCI_DMA: pci_alloc_consistent size too large: 0x%lx\n",
size);
return (void *)NO_TCE;
}
tbl = devnode_table(hwdev);
if (!tbl)
return NULL;
/* Alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(GFP_ATOMIC, order);
if (!ret)
return NULL;
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
mapping = iommu_alloc(tbl, ret, npages, PCI_DMA_BIDIRECTIONAL, NULL);
/* Make sure the update is visible to hardware. */
mb();
if (mapping == NO_TCE) {
free_pages((unsigned long)ret, order);
ret = NULL;
} else
*dma_handle = mapping;
return ret;
}
void pci_iommu_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
struct iommu_table *tbl;
unsigned int npages;
size = PAGE_ALIGN(size);
npages = size >> PAGE_SHIFT;
tbl = devnode_table(hwdev);
if (tbl) {
iommu_free(tbl, dma_handle, npages);
free_pages((unsigned long)vaddr, get_order(size));
}
}
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address of the buffer
* passed here is the kernel (virtual) address of the buffer. The buffer
* need not be page aligned, the dma_addr_t returned will point to the same
* byte within the page as vaddr.
*/
dma_addr_t pci_iommu_map_single(struct pci_dev *hwdev, void *vaddr,
size_t size, int direction)
{
struct iommu_table * tbl;
dma_addr_t dma_handle = NO_TCE;
unsigned long uaddr;
unsigned int npages;
unsigned long handle = 0;
BUG_ON(direction == PCI_DMA_NONE);
uaddr = (unsigned long)vaddr;
npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
npages >>= PAGE_SHIFT;
tbl = devnode_table(hwdev);
if (tbl) {
dma_handle = iommu_alloc(tbl, vaddr, npages, direction, &handle);
if (dma_handle == NO_TCE) {
if (printk_ratelimit()) {
printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %p npages %d\n",
tbl, vaddr, npages);
}
} else
dma_handle |= (uaddr & ~PAGE_MASK);
}
mb();
return dma_handle;
}
void pci_iommu_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_handle,
size_t size, int direction)
{
struct iommu_table *tbl;
unsigned int npages;
BUG_ON(direction == PCI_DMA_NONE);
npages = (PAGE_ALIGN(dma_handle + size) - (dma_handle & PAGE_MASK))
>> PAGE_SHIFT;
tbl = devnode_table(hwdev);
if (tbl)
iommu_free(tbl, dma_handle, npages);
}
int pci_iommu_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
int direction)
{
struct iommu_table * tbl;
unsigned long handle;
BUG_ON(direction == PCI_DMA_NONE);
if (nelems == 0)
return 0;
tbl = devnode_table(pdev);
if (!tbl)
return 0;
handle = 0;
return iommu_alloc_sg(tbl, sglist, nelems, direction, &handle);
}
void pci_iommu_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
int direction)
{
struct iommu_table *tbl;
BUG_ON(direction == PCI_DMA_NONE);
tbl = devnode_table(pdev);
if (!tbl)
return;
iommu_free_sg(tbl, sglist, nelems, direction);
}
/* We support DMA to/from any memory page via the iommu */
static int pci_iommu_dma_supported(struct pci_dev *pdev, u64 mask)
{
return 1;
}
void pci_iommu_init(void)
{
pci_dma_ops.pci_alloc_consistent = pci_iommu_alloc_consistent;
pci_dma_ops.pci_free_consistent = pci_iommu_free_consistent;
pci_dma_ops.pci_map_single = pci_iommu_map_single;
pci_dma_ops.pci_unmap_single = pci_iommu_unmap_single;
pci_dma_ops.pci_map_sg = pci_iommu_map_sg;
pci_dma_ops.pci_unmap_sg = pci_iommu_unmap_sg;
pci_dma_ops.pci_dma_supported = pci_iommu_dma_supported;
}
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/pci-bridge.h> #include <asm/pci-bridge.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <asm/machdep.h> #include <asm/machdep.h>
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/bootx.h> #include <asm/bootx.h>
...@@ -181,8 +181,9 @@ void __init pmac_setup_arch(void) ...@@ -181,8 +181,9 @@ void __init pmac_setup_arch(void)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
pmac_setup_smp(); pmac_setup_smp();
#endif #endif
/* Setup the PCI DMA to "direct" for now, until we have proper
* DART support and can deal with more than 2Gb of RAM /* Setup the PCI DMA to "direct" by default. May be overriden
* by iommu later on
*/ */
pci_dma_init_direct(); pci_dma_init_direct();
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/naca.h> #include <asm/naca.h>
#include <asm/pci.h> #include <asm/pci.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <asm/bootinfo.h> #include <asm/bootinfo.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/btext.h> #include <asm/btext.h>
...@@ -3003,15 +3003,15 @@ static int of_finish_dynamic_node(struct device_node *node) ...@@ -3003,15 +3003,15 @@ static int of_finish_dynamic_node(struct device_node *node)
node->devfn = (regs[0] >> 8) & 0xff; node->devfn = (regs[0] >> 8) & 0xff;
} }
/* fixing up tce_table */ /* fixing up iommu_table */
if(strcmp(node->name, "pci") == 0 && if(strcmp(node->name, "pci") == 0 &&
get_property(node, "ibm,dma-window", NULL)) { get_property(node, "ibm,dma-window", NULL)) {
node->bussubno = node->busno; node->bussubno = node->busno;
create_pci_bus_tce_table((unsigned long)node); iommu_devnode_init(node);
} }
else else
node->tce_table = parent->tce_table; node->iommu_table = parent->iommu_table;
out: out:
of_node_put(parent); of_node_put(parent);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/rtas.h> #include <asm/rtas.h>
#include <asm/pci_dma.h> #include <asm/iommu.h>
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/vio.h> #include <asm/vio.h>
...@@ -29,12 +29,7 @@ ...@@ -29,12 +29,7 @@
#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) #define DBGENTER() pr_debug("%s entered\n", __FUNCTION__)
extern struct TceTable *build_tce_table(struct TceTable *tbl); struct iommu_table *vio_build_iommu_table(struct vio_dev *dev);
extern dma_addr_t get_tces(struct TceTable *, unsigned order,
void *page, unsigned numPages, int direction);
extern void tce_free(struct TceTable *tbl, dma_addr_t dma_addr,
unsigned order, unsigned num_pages);
static int vio_num_address_cells; static int vio_num_address_cells;
static struct vio_dev *vio_bus_device; /* fake "parent" device */ static struct vio_dev *vio_bus_device; /* fake "parent" device */
...@@ -240,7 +235,7 @@ struct vio_dev * __devinit vio_register_device(struct device_node *of_node) ...@@ -240,7 +235,7 @@ struct vio_dev * __devinit vio_register_device(struct device_node *of_node)
viodev->archdata = (void *)of_node_get(of_node); viodev->archdata = (void *)of_node_get(of_node);
viodev->unit_address = *unit_address; viodev->unit_address = *unit_address;
viodev->tce_table = vio_build_tce_table(viodev); viodev->iommu_table = vio_build_iommu_table(viodev);
viodev->irq = NO_IRQ; viodev->irq = NO_IRQ;
irq_p = (unsigned int *)get_property(of_node, "interrupts", 0); irq_p = (unsigned int *)get_property(of_node, "interrupts", 0);
...@@ -296,16 +291,16 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) ...@@ -296,16 +291,16 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
EXPORT_SYMBOL(vio_get_attribute); EXPORT_SYMBOL(vio_get_attribute);
/** /**
* vio_build_tce_table: - gets the dma information from OF and builds the TCE tree. * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree.
* @dev: the virtual device. * @dev: the virtual device.
* *
* Returns a pointer to the built tce tree, or NULL if it can't * Returns a pointer to the built tce tree, or NULL if it can't
* find property. * find property.
*/ */
struct TceTable * vio_build_tce_table(struct vio_dev *dev) struct iommu_table * vio_build_iommu_table(struct vio_dev *dev)
{ {
unsigned int *dma_window; unsigned int *dma_window;
struct TceTable *newTceTable; struct iommu_table *newTceTable;
unsigned long offset; unsigned long offset;
unsigned long size; unsigned long size;
int dma_window_property_size; int dma_window_property_size;
...@@ -315,14 +310,14 @@ struct TceTable * vio_build_tce_table(struct vio_dev *dev) ...@@ -315,14 +310,14 @@ struct TceTable * vio_build_tce_table(struct vio_dev *dev)
return NULL; return NULL;
} }
newTceTable = (struct TceTable *) kmalloc(sizeof(struct TceTable), GFP_KERNEL); newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
/* RPA docs say that #address-cells is always 1 for virtual /* RPA docs say that #address-cells is always 1 for virtual
devices, but some older boxes' OF returns 2. This should devices, but some older boxes' OF returns 2. This should
be removed by GA, unless there is legacy OFs that still be removed by GA, unless there is legacy OFs that still
have 2 for #address-cells */ have 2 for #address-cells */
size = ((dma_window[1+vio_num_address_cells] size = ((dma_window[1+vio_num_address_cells] >> PAGE_SHIFT) << 3)
>> PAGE_SHIFT) << 3) >> PAGE_SHIFT; >> PAGE_SHIFT;
/* This is just an ugly kludge. Remove as soon as the OF for all /* This is just an ugly kludge. Remove as soon as the OF for all
machines actually follow the spec and encodes the offset field machines actually follow the spec and encodes the offset field
...@@ -332,7 +327,7 @@ struct TceTable * vio_build_tce_table(struct vio_dev *dev) ...@@ -332,7 +327,7 @@ struct TceTable * vio_build_tce_table(struct vio_dev *dev)
} else if (dma_window_property_size == 20) { } else if (dma_window_property_size == 20) {
size = ((dma_window[4] >> PAGE_SHIFT) << 3) >> PAGE_SHIFT; size = ((dma_window[4] >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
} else { } else {
printk(KERN_WARNING "vio_build_tce_table: Invalid size of ibm,my-dma-window=%i, using 0x80 for size\n", dma_window_property_size); printk(KERN_WARNING "vio_build_iommu_table: Invalid size of ibm,my-dma-window=%i, using 0x80 for size\n", dma_window_property_size);
size = 0x80; size = 0x80;
} }
...@@ -342,14 +337,15 @@ struct TceTable * vio_build_tce_table(struct vio_dev *dev) ...@@ -342,14 +337,15 @@ struct TceTable * vio_build_tce_table(struct vio_dev *dev)
offset = dma_window[1] >> PAGE_SHIFT; offset = dma_window[1] >> PAGE_SHIFT;
/* TCE table size - measured in units of pages of tce table */ /* TCE table size - measured in units of pages of tce table */
newTceTable->size = size; newTceTable->it_size = size;
/* offset for VIO should always be 0 */ /* offset for VIO should always be 0 */
newTceTable->startOffset = offset; newTceTable->it_offset = offset;
newTceTable->busNumber = 0; newTceTable->it_busno = 0;
newTceTable->index = (unsigned long)dma_window[0]; newTceTable->it_index = (unsigned long)dma_window[0];
newTceTable->tceType = TCE_VB; newTceTable->it_type = TCE_VB;
newTceTable->it_entrysize = sizeof(union tce_entry);
return build_tce_table(newTceTable); return iommu_init_table(newTceTable);
} }
int vio_enable_interrupts(struct vio_dev *dev) int vio_enable_interrupts(struct vio_dev *dev)
...@@ -376,29 +372,21 @@ EXPORT_SYMBOL(vio_disable_interrupts); ...@@ -376,29 +372,21 @@ EXPORT_SYMBOL(vio_disable_interrupts);
dma_addr_t vio_map_single(struct vio_dev *dev, void *vaddr, dma_addr_t vio_map_single(struct vio_dev *dev, void *vaddr,
size_t size, int direction ) size_t size, int direction )
{ {
struct TceTable * tbl; struct iommu_table *tbl;
dma_addr_t dma_handle = NO_TCE; dma_addr_t dma_handle = NO_TCE;
unsigned long uaddr; unsigned long uaddr;
unsigned order, nPages; unsigned int npages;
if(direction == PCI_DMA_NONE) BUG(); BUG_ON(direction == PCI_DMA_NONE);
uaddr = (unsigned long)vaddr; uaddr = (unsigned long)vaddr;
nPages = PAGE_ALIGN( uaddr + size ) - ( uaddr & PAGE_MASK ); npages = PAGE_ALIGN( uaddr + size ) - ( uaddr & PAGE_MASK );
order = get_order( nPages & PAGE_MASK ); npages >>= PAGE_SHIFT;
nPages >>= PAGE_SHIFT;
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("VIO_DMA: vio_map_single size to large: 0x%lx \n",size);
return NO_TCE;
}
tbl = dev->tce_table; tbl = dev->iommu_table;
if(tbl) { if (tbl) {
dma_handle = get_tces(tbl, order, vaddr, nPages, direction); dma_handle = iommu_alloc(tbl, vaddr, npages, direction, NULL);
dma_handle |= (uaddr & ~PAGE_MASK); dma_handle |= (uaddr & ~PAGE_MASK);
} }
...@@ -409,107 +397,92 @@ EXPORT_SYMBOL(vio_map_single); ...@@ -409,107 +397,92 @@ EXPORT_SYMBOL(vio_map_single);
void vio_unmap_single(struct vio_dev *dev, dma_addr_t dma_handle, void vio_unmap_single(struct vio_dev *dev, dma_addr_t dma_handle,
size_t size, int direction) size_t size, int direction)
{ {
struct TceTable * tbl; struct iommu_table * tbl;
unsigned order, nPages; unsigned int npages;
if (direction == PCI_DMA_NONE) BUG();
nPages = PAGE_ALIGN( dma_handle + size ) - ( dma_handle & PAGE_MASK ); BUG_ON(direction == PCI_DMA_NONE);
order = get_order( nPages & PAGE_MASK );
nPages >>= PAGE_SHIFT;
/* Client asked for way to much space. This is checked later anyway */ npages = PAGE_ALIGN( dma_handle + size ) - ( dma_handle & PAGE_MASK );
/* It is easier to debug here for the drivers than in the tce tables.*/ npages >>= PAGE_SHIFT;
if(order >= NUM_TCE_LEVELS) {
printk("VIO_DMA: vio_unmap_single 0x%lx size to large: 0x%lx \n",(unsigned long)dma_handle,(unsigned long)size);
return;
}
tbl = dev->tce_table; tbl = dev->iommu_table;
if(tbl) tce_free(tbl, dma_handle, order, nPages); if(tbl)
iommu_free(tbl, dma_handle, npages);
} }
EXPORT_SYMBOL(vio_unmap_single); EXPORT_SYMBOL(vio_unmap_single);
int vio_map_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems, int vio_map_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems,
int direction) int direction)
{ {
int i; struct iommu_table *tbl;
unsigned long handle;
for (i = 0; i < nelems; i++) {
BUG_ON(direction == PCI_DMA_NONE);
/* 2.4 scsi scatterlists use address field.
Not sure about other subsystems. */ if (nelems == 0)
void *vaddr; return 0;
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
if (sglist->address) tbl = vdev->iommu_table;
vaddr = sglist->address; if (!tbl)
else return 0;
#endif
vaddr = page_address(sglist->page) + sglist->offset;
sglist->dma_address = vio_map_single(vdev, vaddr,
sglist->length,
direction);
sglist->dma_length = sglist->length;
sglist++;
}
return nelems; return iommu_alloc_sg(tbl, sglist, nelems, direction, &handle);
} }
EXPORT_SYMBOL(vio_map_sg); EXPORT_SYMBOL(vio_map_sg);
void vio_unmap_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems, void vio_unmap_sg(struct vio_dev *vdev, struct scatterlist *sglist, int nelems,
int direction) int direction)
{ {
while (nelems--) { struct iommu_table *tbl;
vio_unmap_single(vdev, sglist->dma_address,
sglist->dma_length, direction); BUG_ON(direction == PCI_DMA_NONE);
sglist++;
} tbl = vdev->iommu_table;
if (tbl)
iommu_free_sg(tbl, sglist, nelems, direction);
} }
EXPORT_SYMBOL(vio_unmap_sg);
void *vio_alloc_consistent(struct vio_dev *dev, size_t size, void *vio_alloc_consistent(struct vio_dev *dev, size_t size,
dma_addr_t *dma_handle) dma_addr_t *dma_handle)
{ {
struct TceTable * tbl; struct iommu_table * tbl;
void *ret = NULL; void *ret = NULL;
unsigned order, nPages; unsigned int npages, order;
dma_addr_t tce; dma_addr_t tce;
size = PAGE_ALIGN(size); size = PAGE_ALIGN(size);
npages = size >> PAGE_SHIFT;
order = get_order(size); order = get_order(size);
nPages = 1 << order;
/* Client asked for way to much space. This is checked later anyway */ /* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/ /* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) { if(order >= IOMAP_MAX_ORDER) {
printk("VIO_DMA: vio_alloc_consistent size to large: 0x%lx \n",size); printk("VIO_DMA: vio_alloc_consistent size to large: 0x%lx \n", size);
return (void *)NO_TCE; return (void *)NO_TCE;
} }
tbl = dev->tce_table; tbl = dev->iommu_table;
if ( tbl ) { if (tbl) {
/* Alloc enough pages (and possibly more) */ /* Alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages( GFP_ATOMIC, order ); ret = (void *)__get_free_pages(GFP_ATOMIC, order);
if ( ret ) { if (ret) {
/* Page allocation succeeded */ /* Page allocation succeeded */
memset(ret, 0, nPages << PAGE_SHIFT); memset(ret, 0, npages << PAGE_SHIFT);
/* Set up tces to cover the allocated range */ /* Set up tces to cover the allocated range */
tce = get_tces( tbl, order, ret, nPages, PCI_DMA_BIDIRECTIONAL ); tce = iommu_alloc(tbl, ret, npages, PCI_DMA_BIDIRECTIONAL, NULL);
if ( tce == NO_TCE ) { if (tce == NO_TCE) {
PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: get_tces failed\n" ); PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: iommu_alloc failed\n" );
free_pages( (unsigned long)ret, order ); free_pages((unsigned long)ret, order);
ret = NULL; ret = NULL;
} else {
*dma_handle = tce;
} }
else
{
*dma_handle = tce;
}
} }
else PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: __get_free_pages failed for order = %d\n", order); else PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: __get_free_pages failed for size = %d\n", size);
} }
else PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: get_tce_table failed for 0x%016lx\n", dev); else PPCDBG(PPCDBG_TCE, "vio_alloc_consistent: get_iommu_table failed for 0x%016lx\n", dev);
PPCDBG(PPCDBG_TCE, "\tvio_alloc_consistent: dma_handle = 0x%16.16lx\n", *dma_handle); PPCDBG(PPCDBG_TCE, "\tvio_alloc_consistent: dma_handle = 0x%16.16lx\n", *dma_handle);
PPCDBG(PPCDBG_TCE, "\tvio_alloc_consistent: return = 0x%16.16lx\n", ret); PPCDBG(PPCDBG_TCE, "\tvio_alloc_consistent: return = 0x%16.16lx\n", ret);
...@@ -520,28 +493,20 @@ EXPORT_SYMBOL(vio_alloc_consistent); ...@@ -520,28 +493,20 @@ EXPORT_SYMBOL(vio_alloc_consistent);
void vio_free_consistent(struct vio_dev *dev, size_t size, void vio_free_consistent(struct vio_dev *dev, size_t size,
void *vaddr, dma_addr_t dma_handle) void *vaddr, dma_addr_t dma_handle)
{ {
struct TceTable * tbl; struct iommu_table *tbl;
unsigned order, nPages; unsigned int npages;
PPCDBG(PPCDBG_TCE, "vio_free_consistent:\n"); PPCDBG(PPCDBG_TCE, "vio_free_consistent:\n");
PPCDBG(PPCDBG_TCE, "\tdev = 0x%16.16lx, size = 0x%16.16lx, dma_handle = 0x%16.16lx, vaddr = 0x%16.16lx\n", dev, size, dma_handle, vaddr); PPCDBG(PPCDBG_TCE, "\tdev = 0x%16.16lx, size = 0x%16.16lx, dma_handle = 0x%16.16lx, vaddr = 0x%16.16lx\n", dev, size, dma_handle, vaddr);
size = PAGE_ALIGN(size); size = PAGE_ALIGN(size);
order = get_order(size); npages = size >> PAGE_SHIFT;
nPages = 1 << order;
/* Client asked for way to much space. This is checked later anyway */
/* It is easier to debug here for the drivers than in the tce tables.*/
if(order >= NUM_TCE_LEVELS) {
printk("PCI_DMA: pci_free_consistent size to large: 0x%lx \n",size);
return;
}
tbl = dev->tce_table; tbl = dev->iommu_table;
if ( tbl ) { if ( tbl ) {
tce_free(tbl, dma_handle, order, nPages); iommu_free(tbl, dma_handle, npages);
free_pages( (unsigned long)vaddr, order ); free_pages((unsigned long)vaddr, get_order(size));
} }
} }
EXPORT_SYMBOL(vio_free_consistent); EXPORT_SYMBOL(vio_free_consistent);
......
...@@ -60,10 +60,8 @@ ...@@ -60,10 +60,8 @@
#include <asm/ppcdebug.h> #include <asm/ppcdebug.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/iommu.h>
#ifdef CONFIG_PPC_ISERIES
#include <asm/iSeries/iSeries_dma.h>
#endif
struct mmu_context_queue_t mmu_context_queue; struct mmu_context_queue_t mmu_context_queue;
int mem_init_done; int mem_init_done;
...@@ -885,7 +883,7 @@ void __init mem_init(void) ...@@ -885,7 +883,7 @@ void __init mem_init(void)
mem_init_done = 1; mem_init_done = 1;
#ifdef CONFIG_PPC_ISERIES #ifdef CONFIG_PPC_ISERIES
create_virtual_bus_tce_table(); iommu_vio_init();
#endif #endif
} }
......
/*
* iSeries_dma.h
* Copyright (C) 2001 Mike Corrigan IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ISERIES_DMA_H
#define _ISERIES_DMA_H
#include <asm/types.h>
#include <linux/spinlock.h>
// NUM_TCE_LEVELS defines the largest contiguous block
// of dma (tce) space we can get. NUM_TCE_LEVELS = 10
// allows up to 2**9 pages (512 * 4096) = 2 MB
#define NUM_TCE_LEVELS 10
#define NO_TCE ((dma_addr_t)-1)
// Tces come in two formats, one for the virtual bus and a different
// format for PCI
#define TCE_VB 0
#define TCE_PCI 1
union Tce {
u64 wholeTce;
struct {
u64 cacheBits :6; /* Cache hash bits - not used */
u64 rsvd :6;
u64 rpn :40; /* Absolute page number */
u64 valid :1; /* Tce is valid (vb only) */
u64 allIo :1; /* Tce is valid for all lps (vb only) */
u64 lpIndex :8; /* LpIndex for user of TCE (vb only) */
u64 pciWrite :1; /* Write allowed (pci only) */
u64 readWrite :1; /* Read allowed (pci), Write allowed
(vb) */
} tceBits;
};
struct Bitmap {
unsigned long numBits;
unsigned long numBytes;
unsigned char * map;
};
struct MultiLevelBitmap {
unsigned long maxLevel;
struct Bitmap level[NUM_TCE_LEVELS];
};
struct TceTable {
u64 busNumber;
u64 size;
u64 startOffset;
u64 index;
spinlock_t lock;
struct MultiLevelBitmap mlbm;
};
struct HvTceTableManagerCB {
u64 busNumber; /* Bus number for this tce table */
u64 start; /* Will be NULL for secondary */
u64 totalSize; /* Size (in pages) of whole table */
u64 startOffset; /* Index into real tce table of the
start of our section */
u64 size; /* Size (in pages) of our section */
u64 index; /* Index of this tce table (token?) */
u16 maxTceTableIndex; /* Max number of tables for partition */
u8 virtualBusFlag; /* Flag to indicate virtual bus */
u8 rsvd[5];
};
extern struct TceTable virtBusTceTable; /* Tce table for virtual bus */
extern struct TceTable * build_tce_table( struct HvTceTableManagerCB *,
struct TceTable *);
extern void create_virtual_bus_tce_table( void );
extern void create_pci_bus_tce_table( unsigned busNumber );
#endif /* _ISERIES_DMA_H */
...@@ -92,7 +92,7 @@ struct iSeries_Device_Node { ...@@ -92,7 +92,7 @@ struct iSeries_Device_Node {
int Flags; /* Possible flags(disable/bist)*/ int Flags; /* Possible flags(disable/bist)*/
u16 Vendor; /* Vendor ID */ u16 Vendor; /* Vendor ID */
u8 LogicalSlot; /* Hv Slot Index for Tces */ u8 LogicalSlot; /* Hv Slot Index for Tces */
struct TceTable* DevTceTable; /* Device TCE Table */ struct iommu_table* iommu_table;/* Device TCE Table */
u8 PhbId; /* Phb Card is on. */ u8 PhbId; /* Phb Card is on. */
u16 Board; /* Board Number */ u16 Board; /* Board Number */
u8 FrameId; /* iSeries spcn Frame Id */ u8 FrameId; /* iSeries spcn Frame Id */
......
...@@ -181,9 +181,12 @@ static inline void * phys_to_virt(unsigned long address) ...@@ -181,9 +181,12 @@ static inline void * phys_to_virt(unsigned long address)
*/ */
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#if 0 /* We do NOT want virtual merging, it would put too much pressure on
#define BIO_VMERGE_BOUNDARY 4096 * our iommu allocator. Instead, we want drivers to be smart enough
#endif * to coalesce sglists that happen to have been mapped in a contiguous
* way by the iommu
*/
#define BIO_VMERGE_BOUNDARY 0
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
/*
* iommu.h
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
* Rewrite, cleanup:
* Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _PCI_DMA_H
#define _PCI_DMA_H
#include <asm/types.h>
#include <linux/spinlock.h>
/*
* IOMAP_MAX_ORDER defines the largest contiguous block
* of dma (tce) space we can get. IOMAP_MAX_ORDER = 10
* allows up to 2**9 pages (512 * 4096) = 2 MB
*/
#define IOMAP_MAX_ORDER 10
#define NO_TCE ((dma_addr_t)-1)
/*
* Tces come in two formats, one for the virtual bus and a different
* format for PCI
*/
#define TCE_VB 0
#define TCE_PCI 1
/* tce_entry
* Used by pSeries (SMP) and iSeries/pSeries LPAR, but there it's
* abstracted so layout is irrelevant.
*/
union tce_entry {
unsigned long te_word;
struct {
unsigned int tb_cacheBits :6; /* Cache hash bits - not used */
unsigned int tb_rsvd :6;
unsigned long tb_rpn :40; /* Real page number */
unsigned int tb_valid :1; /* Tce is valid (vb only) */
unsigned int tb_allio :1; /* Tce is valid for all lps (vb only) */
unsigned int tb_lpindex :8; /* LpIndex for user of TCE (vb only) */
unsigned int tb_pciwr :1; /* Write allowed (pci only) */
unsigned int tb_rdwr :1; /* Read allowed (pci), Write allowed (vb) */
} te_bits;
#define te_cacheBits te_bits.tb_cacheBits
#define te_rpn te_bits.tb_rpn
#define te_valid te_bits.tb_valid
#define te_allio te_bits.tb_allio
#define te_lpindex te_bits.tb_lpindex
#define te_pciwr te_bits.tb_pciwr
#define te_rdwr te_bits.tb_rdwr
};
struct iommu_table {
unsigned long it_busno; /* Bus number this table belongs to */
unsigned long it_size; /* Size in pages of iommu table */
unsigned long it_offset; /* Offset into global table */
unsigned long it_base; /* mapped address of tce table */
unsigned long it_index; /* which iommu table this is */
unsigned long it_type; /* type: PCI or Virtual Bus */
unsigned long it_entrysize; /* Size of an entry in bytes */
unsigned long it_blocksize; /* Entries in each block (cacheline) */
unsigned long it_hint; /* Hint for next alloc */
unsigned long it_largehint; /* Hint for large allocs */
spinlock_t it_lock; /* Protects it_map */
unsigned long it_mapsize; /* Size of map in # of entries (bits) */
unsigned long *it_map; /* A simple allocation bitmap for now */
};
#ifdef CONFIG_PPC_ISERIES
struct iommu_table_cb {
unsigned long itc_busno; /* Bus number for this tce table */
unsigned long itc_start; /* Will be NULL for secondary */
unsigned long itc_totalsize; /* Size (in pages) of whole table */
unsigned long itc_offset; /* Index into real tce table of the
start of our section */
unsigned long itc_size; /* Size (in pages) of our section */
unsigned long itc_index; /* Index of this tce table */
unsigned short itc_maxtables; /* Max num of tables for partition */
unsigned char itc_virtbus; /* Flag to indicate virtual bus */
unsigned char itc_slotno; /* IOA Tce Slot Index */
unsigned char itc_rsvd[4];
};
extern struct iommu_table vio_tce_table; /* Tce table for virtual bus */
#endif /* CONFIG_PPC_ISERIES */
struct scatterlist;
#ifdef CONFIG_PPC_PSERIES
/* Walks all buses and creates iommu tables */
extern void iommu_setup_pSeries(void);
extern void iommu_setup_pmac(void);
/* Creates table for an individual device node */
extern void iommu_devnode_init(struct device_node *dn);
#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_ISERIES
/* Walks all buses and creates iommu tables */
extern void iommu_setup_iSeries(void);
/* Initializes tables for bio buses */
extern void __init iommu_vio_init(void);
struct iSeries_Device_Node;
/* Creates table for an individual device node */
extern void iommu_devnode_init(struct iSeries_Device_Node *dn);
#endif /* CONFIG_PPC_ISERIES */
/* Initializes an iommu_table based in values set in the passed-in
* structure
*/
extern struct iommu_table *iommu_init_table(struct iommu_table * tbl);
/* allocates a range of tces and sets them to the pages */
extern dma_addr_t iommu_alloc(struct iommu_table *, void *page,
unsigned int numPages, int direction,
unsigned long *handle);
extern void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages);
/* same with sg lists */
extern int iommu_alloc_sg(struct iommu_table *table, struct scatterlist *sglist,
int nelems, int direction, unsigned long *handle);
extern void iommu_free_sg(struct iommu_table *tbl, struct scatterlist *sglist,
int nelems, int direction);
extern void tce_init_pSeries(void);
extern void tce_init_iSeries(void);
extern void pci_iommu_init(void);
extern void pci_dma_init_direct(void);
#endif
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
struct pt_regs; struct pt_regs;
struct pci_bus; struct pci_bus;
struct device_node; struct device_node;
struct TceTable; struct iommu_table;
struct rtc_time; struct rtc_time;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -53,12 +53,15 @@ struct machdep_calls { ...@@ -53,12 +53,15 @@ struct machdep_calls {
unsigned long number, unsigned long number,
int local); int local);
void (*tce_build)(struct TceTable * tbl, void (*tce_build)(struct iommu_table * tbl,
long tcenum, long index,
long npages,
unsigned long uaddr, unsigned long uaddr,
int direction); int direction);
void (*tce_free_one)(struct TceTable *tbl, void (*tce_free)(struct iommu_table *tbl,
long tcenum); long index,
long npages);
void (*tce_flush)(struct iommu_table *tbl);
void (*setup_arch)(void); void (*setup_arch)(void);
/* Optional, may be NULL. */ /* Optional, may be NULL. */
......
...@@ -70,6 +70,8 @@ struct pci_dma_ops { ...@@ -70,6 +70,8 @@ struct pci_dma_ops {
int nents, int direction); int nents, int direction);
void (*pci_unmap_sg)(struct pci_dev *hwdev, struct scatterlist *sg, void (*pci_unmap_sg)(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction); int nents, int direction);
int (*pci_dma_supported)(struct pci_dev *hwdev, u64 mask);
int (*pci_dac_dma_supported)(struct pci_dev *hwdev, u64 mask);
}; };
extern struct pci_dma_ops pci_dma_ops; extern struct pci_dma_ops pci_dma_ops;
...@@ -130,10 +132,25 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, ...@@ -130,10 +132,25 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
* be supported properly. For example, if your device can * be supported properly. For example, if your device can
* only drive the low 24-bits during PCI bus mastering, then * only drive the low 24-bits during PCI bus mastering, then
* you would pass 0x00ffffff as the mask to this function. * you would pass 0x00ffffff as the mask to this function.
* We default to supporting only 32 bits DMA unless we have
* an explicit override of this function in pci_dma_ops for
* the platform
*/ */
static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
{ {
return 1; if (pci_dma_ops.pci_dma_supported)
return pci_dma_ops.pci_dma_supported(hwdev, mask);
return (mask < 0x100000000ull);
}
/* For DAC DMA, we currently don't support it by default, but
* we let the platform override this
*/
static inline int pci_dac_dma_supported(struct pci_dev *hwdev,u64 mask)
{
if (pci_dma_ops.pci_dac_dma_supported)
return pci_dma_ops.pci_dac_dma_supported(hwdev, mask);
return 0;
} }
extern int pci_domain_nr(struct pci_bus *bus); extern int pci_domain_nr(struct pci_bus *bus);
...@@ -167,8 +184,6 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, ...@@ -167,8 +184,6 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL)) (((PTR)->LEN_NAME) = (VAL))
#define pci_dac_dma_supported(pci_dev, mask) (0)
/* The PCI address space does equal the physical memory /* The PCI address space does equal the physical memory
* address space. The networking and block device layers use * address space. The networking and block device layers use
* this boolean for bounce buffer decisions. * this boolean for bounce buffer decisions.
......
/*
* pci_dma.h
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _PCI_DMA_H
#define _PCI_DMA_H
#include <asm/types.h>
#include <linux/spinlock.h>
/*
* NUM_TCE_LEVELS defines the largest contiguous block
* of dma (tce) space we can get. NUM_TCE_LEVELS = 10
* allows up to 2**9 pages (512 * 4096) = 2 MB
*/
#define NUM_TCE_LEVELS 10
#define NO_TCE ((dma_addr_t)-1)
/*
* Tces come in two formats, one for the virtual bus and a different
* format for PCI
*/
#define TCE_VB 0
#define TCE_PCI 1
union Tce {
u64 wholeTce;
struct {
u64 cacheBits :6; /* Cache hash bits - not used */
u64 rsvd :6;
u64 rpn :40; /* Absolute page number */
u64 valid :1; /* Tce is valid (vb only) */
u64 allIo :1; /* Tce is valid for all lps (vb only) */
u64 lpIndex :8; /* LpIndex for user of TCE (vb only) */
u64 pciWrite :1; /* Write allowed (pci only) */
u64 readWrite :1; /* Read allowed (pci), Write allowed (vb) */
} tceBits;
};
struct Bitmap {
unsigned long numBits;
unsigned long numBytes;
unsigned char * map;
};
struct MultiLevelBitmap {
unsigned long maxLevel;
struct Bitmap level[NUM_TCE_LEVELS];
};
struct TceTable {
u64 busNumber;
u64 size;
u64 startOffset;
u64 base; /* pSeries native only */
u64 index;
u64 tceType;
spinlock_t lock;
struct MultiLevelBitmap mlbm;
};
struct TceTableManagerCB {
u64 busNumber; /* Bus number for this tce table */
u64 start; /* Will be NULL for secondary */
u64 totalSize; /* Size (in pages) of whole table */
u64 startOffset; /* Index into real tce table of the
start of our section */
u64 size; /* Size (in pages) of our section */
u64 index; /* Index of this tce table (token?) */
u16 maxTceTableIndex; /* Max num of tables for partition */
u8 virtualBusFlag; /* Flag to indicate virtual bus */
u8 logicalSlot; /* IOA Tce Slot Index */
u8 rsvd[4];
};
extern struct TceTable virtBusTceTable; /* Tce table for virtual bus */
extern void create_tce_tables(void);
extern void create_pci_bus_tce_table(unsigned long);
extern void tce_init_pSeries(void);
extern void tce_init_iSeries(void);
extern void pci_dma_init_direct(void);
#endif
...@@ -134,7 +134,7 @@ struct property { ...@@ -134,7 +134,7 @@ struct property {
* indication of a real PCI node. Other nodes leave these fields zeroed. * indication of a real PCI node. Other nodes leave these fields zeroed.
*/ */
struct pci_controller; struct pci_controller;
struct TceTable; struct iommu_table;
struct device_node { struct device_node {
char *name; char *name;
char *type; char *type;
...@@ -155,7 +155,7 @@ struct device_node { ...@@ -155,7 +155,7 @@ struct device_node {
int eeh_mode; /* See eeh.h for possible EEH_MODEs */ int eeh_mode; /* See eeh.h for possible EEH_MODEs */
int eeh_config_addr; int eeh_config_addr;
struct pci_controller *phb; /* for pci devices */ struct pci_controller *phb; /* for pci devices */
struct TceTable *tce_table; /* for phb's or bridges */ struct iommu_table *iommu_table; /* for phb's or bridges */
struct property *properties; struct property *properties;
struct device_node *parent; struct device_node *parent;
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
struct vio_dev; struct vio_dev;
struct vio_driver; struct vio_driver;
struct vio_device_id; struct vio_device_id;
struct TceTable; struct iommu_table;
int vio_register_driver(struct vio_driver *drv); int vio_register_driver(struct vio_driver *drv);
void vio_unregister_driver(struct vio_driver *drv); void vio_unregister_driver(struct vio_driver *drv);
...@@ -48,7 +48,7 @@ struct vio_dev * __devinit vio_register_device(struct device_node *node_vdev); ...@@ -48,7 +48,7 @@ struct vio_dev * __devinit vio_register_device(struct device_node *node_vdev);
void __devinit vio_unregister_device(struct vio_dev *dev); void __devinit vio_unregister_device(struct vio_dev *dev);
const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length);
int vio_get_irq(struct vio_dev *dev); int vio_get_irq(struct vio_dev *dev);
struct TceTable * vio_build_tce_table(struct vio_dev *dev); struct iommu_table * vio_build_iommu_table(struct vio_dev *dev);
int vio_enable_interrupts(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev);
int vio_disable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev);
...@@ -95,7 +95,7 @@ struct vio_dev { ...@@ -95,7 +95,7 @@ struct vio_dev {
struct device_node *archdata; /* Open Firmware node */ struct device_node *archdata; /* Open Firmware node */
void *driver_data; /* data private to the driver */ void *driver_data; /* data private to the driver */
unsigned long unit_address; unsigned long unit_address;
struct TceTable *tce_table; /* vio_map_* uses this */ struct iommu_table *iommu_table; /* vio_map_* uses this */
unsigned int irq; unsigned int irq;
struct device dev; struct device dev;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment