Commit 3d5134ee authored by Benjamin Herrenschmidt's avatar Benjamin Herrenschmidt Committed by Paul Mackerras

[POWERPC] Rewrite IO allocation & mapping on powerpc64

This rewrites pretty much from scratch the handling of MMIO and PIO
space allocations on powerpc64.  The main goals are:

 - Get rid of imalloc and use more common code where possible
 - Simplify the current mess so that PIO space is allocated and
   mapped in a single place for PCI bridges
 - Handle allocation constraints of PIO for all bridges including
   hot plugged ones within the 2GB space reserved for IO ports,
   so that devices on hotplugged busses will now work with drivers
   that assume IO ports fit in an int.
 - Cleanup and separate tracking of the ISA space in the reserved
   low 64K of IO space. No ISA -> Nothing mapped there.

I booted a cell blade with IDE on PIO and MMIO and a dual G5 so
far, that's it :-)

With this patch, all allocations are done using the code in
mm/vmalloc.c, though we use the low level __get_vm_area with
explicit start/stop constraints in order to manage separate
areas for vmalloc/vmap, ioremap, and PCI IOs.

This greatly simplifies a lot of things, as you can see in the
diffstat of that patch :-)

A new pair of functions pcibios_map/unmap_io_space() now replace
all of the previous code that used to manipulate PCI IOs space.
The allocation is done at mapping time, which is now called from
scan_phb's, just before the devices are probed (instead of after,
which is by itself a bug fix). The only other caller is the PCI
hotplug code for hot adding PCI-PCI bridges (slots).

imalloc is gone, as is the "sub-allocation" thing, but I do beleive
that hotplug should still work in the sense that the space allocation
is always done by the PHB, but if you unmap a child bus of this PHB
(which seems to be possible), then the code should properly tear
down all the HPTE mappings for that area of the PHB allocated IO space.

I now always reserve the first 64K of IO space for the bridge with
the ISA bus on it. I have moved the code for tracking ISA in a separate
file which should also make it smarter if we ever are capable of
hot unplugging or re-plugging an ISA bridge.

This should have a side effect on platforms like powermac where VGA IOs
will no longer work. This is done on purpose though as they would have
worked semi-randomly before. The idea at this point is to isolate drivers
that might need to access those and fix them by providing a proper
function to obtain an offset to the legacy IOs of a given bus.
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent c19c03fc
......@@ -65,7 +65,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
module-$(CONFIG_PPC64) += module_64.o
obj-$(CONFIG_MODULES) += $(module-y)
pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o
pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o isa-bridge.o
pci32-$(CONFIG_PPC32) := pci_32.o
obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y)
obj-$(CONFIG_PCI_MSI) += msi.o
......
/*
* Routines for tracking a legacy ISA bridge
*
* Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
*
* Some bits and pieces moved over from pci_64.c
*
* Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define DEBUG
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
unsigned long isa_io_base; /* NULL if no ISA bus */
EXPORT_SYMBOL(isa_io_base);
/* Cached ISA bridge dev. */
static struct device_node *isa_bridge_devnode;
struct pci_dev *isa_bridge_pcidev;
EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
#define ISA_SPACE_MASK 0x1
#define ISA_SPACE_IO 0x1
static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
unsigned long phb_io_base_phys)
{
/* We should get some saner parsing here and remove these structs */
struct pci_address {
u32 a_hi;
u32 a_mid;
u32 a_lo;
};
struct isa_address {
u32 a_hi;
u32 a_lo;
};
struct isa_range {
struct isa_address isa_addr;
struct pci_address pci_addr;
unsigned int size;
};
const struct isa_range *range;
unsigned long pci_addr;
unsigned int isa_addr;
unsigned int size;
int rlen = 0;
range = of_get_property(isa_node, "ranges", &rlen);
if (range == NULL || (rlen < sizeof(struct isa_range)))
goto inval_range;
/* From "ISA Binding to 1275"
* The ranges property is laid out as an array of elements,
* each of which comprises:
* cells 0 - 1: an ISA address
* cells 2 - 4: a PCI address
* (size depending on dev->n_addr_cells)
* cell 5: the size of the range
*/
if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) {
range++;
rlen -= sizeof(struct isa_range);
if (rlen < sizeof(struct isa_range))
goto inval_range;
}
if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO)
goto inval_range;
isa_addr = range->isa_addr.a_lo;
pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
range->pci_addr.a_lo;
/* Assume these are both zero. Note: We could fix that and
* do a proper parsing instead ... oh well, that will do for
* now as nobody uses fancy mappings for ISA bridges
*/
if ((pci_addr != 0) || (isa_addr != 0)) {
printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
__FUNCTION__);
return;
}
/* Align size and make sure it's cropped to 64K */
size = PAGE_ALIGN(range->size);
if (size > 0x10000)
size = 0x10000;
printk(KERN_ERR "no ISA IO ranges or unexpected isa range,"
"mapping 64k\n");
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
size, _PAGE_NO_CACHE|_PAGE_GUARDED);
return;
inval_range:
printk(KERN_ERR "no ISA IO ranges or unexpected isa range,"
"mapping 64k\n");
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
}
/**
* isa_bridge_find_early - Find and map the ISA IO space early before
* main PCI discovery. This is optionally called by
* the arch code when adding PCI PHBs to get early
* access to ISA IO ports
*/
void __init isa_bridge_find_early(struct pci_controller *hose)
{
struct device_node *np, *parent = NULL, *tmp;
/* If we already have an ISA bridge, bail off */
if (isa_bridge_devnode != NULL)
return;
/* For each "isa" node in the system. Note : we do a search by
* type and not by name. It might be better to do by name but that's
* what the code used to do and I don't want to break too much at
* once. We can look into changing that separately
*/
for_each_node_by_type(np, "isa") {
/* Look for our hose being a parent */
for (parent = of_get_parent(np); parent;) {
if (parent == hose->arch_data) {
of_node_put(parent);
break;
}
tmp = parent;
parent = of_get_parent(parent);
of_node_put(tmp);
}
if (parent != NULL)
break;
}
if (np == NULL)
return;
isa_bridge_devnode = np;
/* Now parse the "ranges" property and setup the ISA mapping */
pci_process_ISA_OF_ranges(np, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
pr_debug("ISA bridge (early) is %s\n", np->full_name);
}
/**
* isa_bridge_find_late - Find and map the ISA IO space upon discovery of
* a new ISA bridge
*/
static void __devinit isa_bridge_find_late(struct pci_dev *pdev,
struct device_node *devnode)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
/* Store ISA device node and PCI device */
isa_bridge_devnode = of_node_get(devnode);
isa_bridge_pcidev = pdev;
/* Now parse the "ranges" property and setup the ISA mapping */
pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
pr_debug("ISA bridge (late) is %s on %s\n",
devnode->full_name, pci_name(pdev));
}
/**
* isa_bridge_remove - Remove/unmap an ISA bridge
*/
static void isa_bridge_remove(void)
{
pr_debug("ISA bridge removed !\n");
/* Clear the global ISA io base to indicate that we have no more
* ISA bridge. Note that drivers don't quite handle that, though
* we should probably do something about it. But do we ever really
* have ISA bridges being removed on machines using legacy devices ?
*/
isa_io_base = ISA_IO_BASE;
/* Clear references to the bridge */
of_node_put(isa_bridge_devnode);
isa_bridge_devnode = NULL;
isa_bridge_pcidev = NULL;
/* Unmap the ISA area */
__iounmap_at((void *)ISA_IO_BASE, 0x10000);
}
/**
* isa_bridge_notify - Get notified of PCI devices addition/removal
*/
static int __devinit isa_bridge_notify(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
struct pci_dev *pdev = to_pci_dev(dev);
struct device_node *devnode = pci_device_to_OF_node(pdev);
switch(action) {
case BUS_NOTIFY_ADD_DEVICE:
/* Check if we have an early ISA device, without PCI dev */
if (isa_bridge_devnode && isa_bridge_devnode == devnode &&
!isa_bridge_pcidev) {
pr_debug("ISA bridge PCI attached: %s\n",
pci_name(pdev));
isa_bridge_pcidev = pdev;
}
/* Check if we have no ISA device, and this happens to be one,
* register it as such if it has an OF device
*/
if (!isa_bridge_devnode && devnode && devnode->type &&
!strcmp(devnode->type, "isa"))
isa_bridge_find_late(pdev, devnode);
return 0;
case BUS_NOTIFY_DEL_DEVICE:
/* Check if this our existing ISA device */
if (pdev == isa_bridge_pcidev ||
(devnode && devnode == isa_bridge_devnode))
isa_bridge_remove();
return 0;
}
return 0;
}
static struct notifier_block isa_bridge_notifier = {
.notifier_call = isa_bridge_notify
};
/**
* isa_bridge_init - register to be notified of ISA bridge addition/removal
*
*/
static int __init isa_bridge_init(void)
{
if (firmware_has_feature(FW_FEATURE_ISERIES))
return 0;
bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
return 0;
}
arch_initcall(isa_bridge_init);
......@@ -427,14 +427,6 @@ static int __devinit of_pci_phb_probe(struct of_device *dev,
/* Process "ranges" property */
pci_process_bridge_OF_ranges(phb, dev->node, 0);
/* Setup IO space. We use the non-dynamic version of that code here,
* which doesn't quite support unplugging. Next kernel release will
* have a better fix for this.
* Note also that we don't do ISA, this will also be fixed with a
* more massive rework.
*/
pci_setup_phb_io(phb, pci_io_base == 0);
/* Init pci_dn data structures */
pci_devs_phb_init_dynamic(phb);
......
This diff is collapsed.
......@@ -278,10 +278,8 @@ void __init find_and_init_phbs(void)
{
struct device_node *node;
struct pci_controller *phb;
unsigned int index;
struct device_node *root = of_find_node_by_path("/");
index = 0;
for (node = of_get_next_child(root, NULL);
node != NULL;
node = of_get_next_child(root, node)) {
......@@ -295,8 +293,7 @@ void __init find_and_init_phbs(void)
continue;
rtas_setup_phb(phb);
pci_process_bridge_OF_ranges(phb, node, 0);
pci_setup_phb_io(phb, index == 0);
index++;
isa_bridge_find_early(phb);
}
of_node_put(root);
......@@ -335,7 +332,7 @@ int pcibios_remove_root_bus(struct pci_controller *phb)
return 1;
}
rc = unmap_bus_range(b);
rc = pcibios_unmap_io_space(b);
if (rc) {
printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
__FUNCTION__, b->name);
......
......@@ -11,8 +11,7 @@ obj-$(CONFIG_PPC32) += init_32.o pgtable_32.o mmu_context_32.o
hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC64) += init_64.o pgtable_64.o mmu_context_64.o \
hash_utils_64.o hash_low_64.o tlb_64.o \
slb_low.o slb.o stab.o mmap.o imalloc.o \
$(hash-y)
slb_low.o slb.o stab.o mmap.o $(hash-y)
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o tlb_32.o
obj-$(CONFIG_40x) += 4xx_mmu.o
obj-$(CONFIG_44x) += 44x_mmu.o
......
/*
* c 2001 PPC 64 Team, IBM Corp
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <linux/mutex.h>
#include <asm/cacheflush.h>
#include "mmu_decl.h"
static DEFINE_MUTEX(imlist_mutex);
struct vm_struct * imlist = NULL;
static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
{
unsigned long addr;
struct vm_struct **p, *tmp;
addr = ioremap_bot;
for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
if (size + addr < (unsigned long) tmp->addr)
break;
if ((unsigned long)tmp->addr >= ioremap_bot)
addr = tmp->size + (unsigned long) tmp->addr;
if (addr >= IMALLOC_END-size)
return 1;
}
*im_addr = addr;
return 0;
}
/* Return whether the region described by v_addr and size is a subset
* of the region described by parent
*/
static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
struct vm_struct *parent)
{
return (int) (v_addr >= (unsigned long) parent->addr &&
v_addr < (unsigned long) parent->addr + parent->size &&
size < parent->size);
}
/* Return whether the region described by v_addr and size is a superset
* of the region described by child
*/
static int im_region_is_superset(unsigned long v_addr, unsigned long size,
struct vm_struct *child)
{
struct vm_struct parent;
parent.addr = (void *) v_addr;
parent.size = size;
return im_region_is_subset((unsigned long) child->addr, child->size,
&parent);
}
/* Return whether the region described by v_addr and size overlaps
* the region described by vm. Overlapping regions meet the
* following conditions:
* 1) The regions share some part of the address space
* 2) The regions aren't identical
* 3) Neither region is a subset of the other
*/
static int im_region_overlaps(unsigned long v_addr, unsigned long size,
struct vm_struct *vm)
{
if (im_region_is_superset(v_addr, size, vm))
return 0;
return (v_addr + size > (unsigned long) vm->addr + vm->size &&
v_addr < (unsigned long) vm->addr + vm->size) ||
(v_addr < (unsigned long) vm->addr &&
v_addr + size > (unsigned long) vm->addr);
}
/* Determine imalloc status of region described by v_addr and size.
* Can return one of the following:
* IM_REGION_UNUSED - Entire region is unallocated in imalloc space.
* IM_REGION_SUBSET - Region is a subset of a region that is already
* allocated in imalloc space.
* vm will be assigned to a ptr to the parent region.
* IM_REGION_EXISTS - Exact region already allocated in imalloc space.
* vm will be assigned to a ptr to the existing imlist
* member.
* IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space.
* IM_REGION_SUPERSET - Region is a superset of a region that is already
* allocated in imalloc space.
*/
static int im_region_status(unsigned long v_addr, unsigned long size,
struct vm_struct **vm)
{
struct vm_struct *tmp;
for (tmp = imlist; tmp; tmp = tmp->next)
if (v_addr < (unsigned long) tmp->addr + tmp->size)
break;
*vm = NULL;
if (tmp) {
if (im_region_overlaps(v_addr, size, tmp))
return IM_REGION_OVERLAP;
*vm = tmp;
if (im_region_is_subset(v_addr, size, tmp)) {
/* Return with tmp pointing to superset */
return IM_REGION_SUBSET;
}
if (im_region_is_superset(v_addr, size, tmp)) {
/* Return with tmp pointing to first subset */
return IM_REGION_SUPERSET;
}
else if (v_addr == (unsigned long) tmp->addr &&
size == tmp->size) {
/* Return with tmp pointing to exact region */
return IM_REGION_EXISTS;
}
}
return IM_REGION_UNUSED;
}
static struct vm_struct * split_im_region(unsigned long v_addr,
unsigned long size, struct vm_struct *parent)
{
struct vm_struct *vm1 = NULL;
struct vm_struct *vm2 = NULL;
struct vm_struct *new_vm = NULL;
vm1 = kmalloc(sizeof(*vm1), GFP_KERNEL);
if (vm1 == NULL) {
printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
return NULL;
}
if (v_addr == (unsigned long) parent->addr) {
/* Use existing parent vm_struct to represent child, allocate
* new one for the remainder of parent range
*/
vm1->size = parent->size - size;
vm1->addr = (void *) (v_addr + size);
vm1->next = parent->next;
parent->size = size;
parent->next = vm1;
new_vm = parent;
} else if (v_addr + size == (unsigned long) parent->addr +
parent->size) {
/* Allocate new vm_struct to represent child, use existing
* parent one for remainder of parent range
*/
vm1->size = size;
vm1->addr = (void *) v_addr;
vm1->next = parent->next;
new_vm = vm1;
parent->size -= size;
parent->next = vm1;
} else {
/* Allocate two new vm_structs for the new child and
* uppermost remainder, and use existing parent one for the
* lower remainder of parent range
*/
vm2 = kmalloc(sizeof(*vm2), GFP_KERNEL);
if (vm2 == NULL) {
printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
kfree(vm1);
return NULL;
}
vm1->size = size;
vm1->addr = (void *) v_addr;
vm1->next = vm2;
new_vm = vm1;
vm2->size = ((unsigned long) parent->addr + parent->size) -
(v_addr + size);
vm2->addr = (void *) v_addr + size;
vm2->next = parent->next;
parent->size = v_addr - (unsigned long) parent->addr;
parent->next = vm1;
}
return new_vm;
}
static struct vm_struct * __add_new_im_area(unsigned long req_addr,
unsigned long size)
{
struct vm_struct **p, *tmp, *area;
for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
if (req_addr + size <= (unsigned long)tmp->addr)
break;
}
area = kmalloc(sizeof(*area), GFP_KERNEL);
if (!area)
return NULL;
area->flags = 0;
area->addr = (void *)req_addr;
area->size = size;
area->next = *p;
*p = area;
return area;
}
static struct vm_struct * __im_get_area(unsigned long req_addr,
unsigned long size,
int criteria)
{
struct vm_struct *tmp;
int status;
status = im_region_status(req_addr, size, &tmp);
if ((criteria & status) == 0) {
return NULL;
}
switch (status) {
case IM_REGION_UNUSED:
tmp = __add_new_im_area(req_addr, size);
break;
case IM_REGION_SUBSET:
tmp = split_im_region(req_addr, size, tmp);
break;
case IM_REGION_EXISTS:
/* Return requested region */
break;
case IM_REGION_SUPERSET:
/* Return first existing subset of requested region */
break;
default:
printk(KERN_ERR "%s() unexpected imalloc region status\n",
__FUNCTION__);
tmp = NULL;
}
return tmp;
}
struct vm_struct * im_get_free_area(unsigned long size)
{
struct vm_struct *area;
unsigned long addr;
mutex_lock(&imlist_mutex);
if (get_free_im_addr(size, &addr)) {
printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
__FUNCTION__, size);
area = NULL;
goto next_im_done;
}
area = __im_get_area(addr, size, IM_REGION_UNUSED);
if (area == NULL) {
printk(KERN_ERR
"%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
__FUNCTION__, addr, size);
}
next_im_done:
mutex_unlock(&imlist_mutex);
return area;
}
struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
int criteria)
{
struct vm_struct *area;
mutex_lock(&imlist_mutex);
area = __im_get_area(v_addr, size, criteria);
mutex_unlock(&imlist_mutex);
return area;
}
void im_free(void * addr)
{
struct vm_struct **p, *tmp;
if (!addr)
return;
if ((unsigned long) addr & ~PAGE_MASK) {
printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr);
return;
}
mutex_lock(&imlist_mutex);
for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
*p = tmp->next;
unmap_kernel_range((unsigned long)tmp->addr,
tmp->size);
kfree(tmp);
mutex_unlock(&imlist_mutex);
return;
}
}
mutex_unlock(&imlist_mutex);
printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
addr);
}
......@@ -90,16 +90,4 @@ static inline void flush_HPTE(unsigned context, unsigned long va,
else
_tlbie(va);
}
#else /* CONFIG_PPC64 */
/* imalloc region types */
#define IM_REGION_UNUSED 0x1
#define IM_REGION_SUBSET 0x2
#define IM_REGION_EXISTS 0x4
#define IM_REGION_OVERLAP 0x8
#define IM_REGION_SUPERSET 0x10
extern struct vm_struct * im_get_free_area(unsigned long size);
extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
int region_type);
extern void im_free(void *addr);
#endif
......@@ -34,41 +34,27 @@
#include <linux/stddef.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/idr.h>
#include <linux/nodemask.h>
#include <linux/module.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/lmb.h>
#include <asm/rtas.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/uaccess.h>
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
#include <asm/eeh.h>
#include <asm/processor.h>
#include <asm/mmzone.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/system.h>
#include <asm/iommu.h>
#include <asm/abs_addr.h>
#include <asm/vdso.h>
#include <asm/firmware.h>
#include "mmu_decl.h"
unsigned long ioremap_bot = IMALLOC_BASE;
static unsigned long phbs_io_bot = PHBS_IO_BASE;
unsigned long ioremap_bot = IOREMAP_BASE;
/*
* map_io_page currently only called by __ioremap
......@@ -102,8 +88,8 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
* entry in the hardware page table.
*
*/
if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
mmu_io_psize)) {
if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE,
pa, flags, mmu_io_psize)) {
printk(KERN_ERR "Failed to do bolted mapping IO "
"memory at %016lx !\n", pa);
return -ENOMEM;
......@@ -113,8 +99,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
}
static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa,
unsigned long ea, unsigned long size,
/**
* __ioremap_at - Low level function to establish the page tables
* for an IO mapping
*/
void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
unsigned long flags)
{
unsigned long i;
......@@ -122,17 +111,35 @@ static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa,
if ((flags & _PAGE_PRESENT) == 0)
flags |= pgprot_val(PAGE_KERNEL);
WARN_ON(pa & ~PAGE_MASK);
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
WARN_ON(size & ~PAGE_MASK);
for (i = 0; i < size; i += PAGE_SIZE)
if (map_io_page(ea+i, pa+i, flags))
if (map_io_page((unsigned long)ea+i, pa+i, flags))
return NULL;
return (void __iomem *) (ea + (addr & ~PAGE_MASK));
return (void __iomem *)ea;
}
/**
* __iounmap_from - Low level function to tear down the page tables
* for an IO mapping. This is used for mappings that
* are manipulated manually, like partial unmapping of
* PCI IOs or ISA space.
*/
void __iounmap_at(void *ea, unsigned long size)
{
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
WARN_ON(size & ~PAGE_MASK);
unmap_kernel_range((unsigned long)ea, size);
}
void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
unsigned long pa, ea;
phys_addr_t paligned;
void __iomem *ret;
/*
......@@ -144,27 +151,30 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
* IMALLOC_END
*
*/
pa = addr & PAGE_MASK;
size = PAGE_ALIGN(addr + size) - pa;
paligned = addr & PAGE_MASK;
size = PAGE_ALIGN(addr + size) - paligned;
if ((size == 0) || (pa == 0))
if ((size == 0) || (paligned == 0))
return NULL;
if (mem_init_done) {
struct vm_struct *area;
area = im_get_free_area(size);
area = __get_vm_area(size, VM_IOREMAP,
ioremap_bot, IOREMAP_END);
if (area == NULL)
return NULL;
ea = (unsigned long)(area->addr);
ret = __ioremap_com(addr, pa, ea, size, flags);
ret = __ioremap_at(paligned, area->addr, size, flags);
if (!ret)
im_free(area->addr);
vunmap(area->addr);
} else {
ea = ioremap_bot;
ret = __ioremap_com(addr, pa, ea, size, flags);
ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
if (ret)
ioremap_bot += size;
}
if (ret)
ret += addr & ~PAGE_MASK;
return ret;
}
......@@ -187,61 +197,9 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
}
#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
int __ioremap_explicit(phys_addr_t pa, unsigned long ea,
unsigned long size, unsigned long flags)
{
struct vm_struct *area;
void __iomem *ret;
/* For now, require page-aligned values for pa, ea, and size */
if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
!IS_PAGE_ALIGNED(size)) {
printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__);
return 1;
}
if (!mem_init_done) {
/* Two things to consider in this case:
* 1) No records will be kept (imalloc, etc) that the region
* has been remapped
* 2) It won't be easy to iounmap() the region later (because
* of 1)
*/
;
} else {
area = im_get_area(ea, size,
IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
if (area == NULL) {
/* Expected when PHB-dlpar is in play */
return 1;
}
if (ea != (unsigned long) area->addr) {
printk(KERN_ERR "unexpected addr return from "
"im_get_area\n");
return 1;
}
}
ret = __ioremap_com(pa, pa, ea, size, flags);
if (ret == NULL) {
printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
return 1;
}
if (ret != (void *) ea) {
printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
return 1;
}
return 0;
}
/*
* Unmap an IO region and remove it from imalloc'd list.
* Access to IO memory should be serialized by driver.
*
* XXX what about calls before mem_init_done (ie python_countermeasures())
*/
void __iounmap(volatile void __iomem *token)
{
......@@ -250,9 +208,14 @@ void __iounmap(volatile void __iomem *token)
if (!mem_init_done)
return;
addr = (void *) ((unsigned long __force) token & PAGE_MASK);
im_free(addr);
addr = (void *) ((unsigned long __force)
PCI_FIX_ADDR(token) & PAGE_MASK);
if ((unsigned long)addr < ioremap_bot) {
printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
" at 0x%p\n", addr);
return;
}
vunmap(addr);
}
void iounmap(volatile void __iomem *token)
......@@ -263,77 +226,8 @@ void iounmap(volatile void __iomem *token)
__iounmap(token);
}
static int iounmap_subset_regions(unsigned long addr, unsigned long size)
{
struct vm_struct *area;
/* Check whether subsets of this region exist */
area = im_get_area(addr, size, IM_REGION_SUPERSET);
if (area == NULL)
return 1;
while (area) {
iounmap((void __iomem *) area->addr);
area = im_get_area(addr, size,
IM_REGION_SUPERSET);
}
return 0;
}
int __iounmap_explicit(volatile void __iomem *start, unsigned long size)
{
struct vm_struct *area;
unsigned long addr;
int rc;
addr = (unsigned long __force) start & PAGE_MASK;
/* Verify that the region either exists or is a subset of an existing
* region. In the latter case, split the parent region to create
* the exact region
*/
area = im_get_area(addr, size,
IM_REGION_EXISTS | IM_REGION_SUBSET);
if (area == NULL) {
/* Determine whether subset regions exist. If so, unmap */
rc = iounmap_subset_regions(addr, size);
if (rc) {
printk(KERN_ERR
"%s() cannot unmap nonexistent range 0x%lx\n",
__FUNCTION__, addr);
return 1;
}
} else {
iounmap((void __iomem *) area->addr);
}
/*
* FIXME! This can't be right:
iounmap(area->addr);
* Maybe it should be "iounmap(area);"
*/
return 0;
}
EXPORT_SYMBOL(ioremap);
EXPORT_SYMBOL(ioremap_flags);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__iounmap);
static DEFINE_SPINLOCK(phb_io_lock);
void __iomem * reserve_phb_iospace(unsigned long size)
{
void __iomem *virt_addr;
if (phbs_io_bot >= IMALLOC_BASE)
panic("reserve_phb_iospace(): phb io space overflow\n");
spin_lock(&phb_io_lock);
virt_addr = (void __iomem *) phbs_io_bot;
phbs_io_bot += size;
spin_unlock(&phb_io_lock);
return virt_addr;
}
......@@ -239,3 +239,59 @@ void pte_free_finish(void)
pte_free_submit(*batchp);
*batchp = NULL;
}
/**
* __flush_hash_table_range - Flush all HPTEs for a given address range
* from the hash table (and the TLB). But keeps
* the linux PTEs intact.
*
* @mm : mm_struct of the target address space (generally init_mm)
* @start : starting address
* @end : ending address (not included in the flush)
*
* This function is mostly to be used by some IO hotplug code in order
* to remove all hash entries from a given address range used to map IO
* space on a removed PCI-PCI bidge without tearing down the full mapping
* since 64K pages may overlap with other bridges when using 64K pages
* with 4K HW pages on IO space.
*
* Because of that usage pattern, it's only available with CONFIG_HOTPLUG
* and is implemented for small size rather than speed.
*/
#ifdef CONFIG_HOTPLUG
void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
unsigned long flags;
start = _ALIGN_DOWN(start, PAGE_SIZE);
end = _ALIGN_UP(end, PAGE_SIZE);
BUG_ON(!mm->pgd);
/* Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
* to being hashed). This is not the most performance oriented
* way to do things but is fine for our needs here.
*/
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) {
pte_t *ptep = find_linux_pte(mm->pgd, start);
unsigned long pte;
if (ptep == NULL)
continue;
pte = pte_val(*ptep);
if (!(pte & _PAGE_HASHPTE))
continue;
hpte_need_flush(mm, start, ptep, pte, 0);
}
arch_leave_lazy_mmu_mode();
local_irq_restore(flags);
}
#endif /* CONFIG_HOTPLUG */
......@@ -102,7 +102,7 @@ static void spider_io_flush(const volatile void __iomem *addr)
vaddr = (unsigned long)PCI_FIX_ADDR(addr);
/* Check if it's in allowed range for PIO */
if (vaddr < PHBS_IO_BASE || vaddr >= IMALLOC_BASE)
if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END)
return;
/* Try to find a PTE. If not, clear the paddr, we'll do
......
......@@ -742,6 +742,11 @@ void __init iSeries_pcibios_init(void)
/* Install IO hooks */
ppc_pci_io = iseries_pci_io;
/* iSeries has no IO space in the common sense, it needs to set
* the IO base to 0
*/
pci_io_base = 0;
if (root == NULL) {
printk(KERN_CRIT "iSeries_pcibios_init: can't find root "
"of device tree\n");
......
......@@ -519,23 +519,6 @@ void __devinit maple_pci_irq_fixup(struct pci_dev *dev)
DBG(" <- maple_pci_irq_fixup\n");
}
static void __init maple_fixup_phb_resources(void)
{
struct pci_controller *hose, *tmp;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
hose->io_resource.start += offset;
hose->io_resource.end += offset;
printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n",
hose->global_number,
(unsigned long long)hose->io_resource.start,
(unsigned long long)hose->io_resource.end);
}
}
void __init maple_pci_init(void)
{
struct device_node *np, *root;
......@@ -573,24 +556,6 @@ void __init maple_pci_init(void)
if (ht && add_bridge(ht) != 0)
of_node_put(ht);
/*
* We need to call pci_setup_phb_io for the HT bridge first
* so it gets the I/O port numbers starting at 0, and we
* need to call it for the AGP bridge after that so it gets
* small positive I/O port numbers.
*/
if (u3_ht)
pci_setup_phb_io(u3_ht, 1);
if (u3_agp)
pci_setup_phb_io(u3_agp, 0);
if (u4_pcie)
pci_setup_phb_io(u4_pcie, 0);
/* Fixup the IO resources on our host bridges as the common code
* does it only for childs of the host bridges
*/
maple_fixup_phb_resources();
/* Setup the linkage between OF nodes and PHBs */
pci_devs_phb_init();
......
......@@ -150,29 +150,11 @@ static int __init add_bridge(struct device_node *dev)
printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
/* Interpret the "ranges" property */
/* This also maps the I/O region and sets isa_io/mem_base */
pci_process_bridge_OF_ranges(hose, dev, 1);
pci_setup_phb_io(hose, 1);
return 0;
}
static void __init pas_fixup_phb_resources(void)
{
struct pci_controller *hose, *tmp;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
hose->io_resource.start += offset;
hose->io_resource.end += offset;
printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
hose->global_number,
hose->io_resource.start, hose->io_resource.end);
}
}
void __init pas_pci_init(void)
{
struct device_node *np, *root;
......@@ -190,8 +172,6 @@ void __init pas_pci_init(void)
of_node_put(root);
pas_fixup_phb_resources();
/* Setup the linkage between OF nodes and PHBs */
pci_devs_phb_init();
......
......@@ -1006,19 +1006,6 @@ void __devinit pmac_pci_irq_fixup(struct pci_dev *dev)
#endif /* CONFIG_PPC32 */
}
#ifdef CONFIG_PPC64
static void __init pmac_fixup_phb_resources(void)
{
struct pci_controller *hose, *tmp;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
hose->global_number,
hose->io_resource.start, hose->io_resource.end);
}
}
#endif
void __init pmac_pci_init(void)
{
struct device_node *np, *root;
......@@ -1053,25 +1040,6 @@ void __init pmac_pci_init(void)
if (ht && add_bridge(ht) != 0)
of_node_put(ht);
/*
* We need to call pci_setup_phb_io for the HT bridge first
* so it gets the I/O port numbers starting at 0, and we
* need to call it for the AGP bridge after that so it gets
* small positive I/O port numbers.
*/
if (u3_ht)
pci_setup_phb_io(u3_ht, 1);
if (u3_agp)
pci_setup_phb_io(u3_agp, 0);
if (u4_pcie)
pci_setup_phb_io(u4_pcie, 0);
/*
* On ppc64, fixup the IO resources on our host bridges as
* the common code does it only for children of the host bridges
*/
pmac_fixup_phb_resources();
/* Setup the linkage between OF nodes and PHBs */
pci_devs_phb_init();
......
......@@ -202,8 +202,6 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
rtas_setup_phb(phb);
pci_process_bridge_OF_ranges(phb, dn, 0);
pci_setup_phb_io_dynamic(phb, primary);
pci_devs_phb_init_dynamic(phb);
if (dn->child)
......
......@@ -33,6 +33,8 @@ static inline void setup_kexec_cpu_down_xics(void) { }
static inline void setup_kexec_cpu_down_mpic(void) { }
#endif
extern void pSeries_final_fixup(void);
/* Poweron flag used for enabling auto ups restart */
extern unsigned long rtas_poweron_auto;
......
......@@ -159,8 +159,8 @@ static void dlpar_pci_add_bus(struct device_node *dn)
/* Claim new bus resources */
pcibios_claim_one_bus(dev->bus);
/* ioremap() for child bus, which may or may not succeed */
remap_bus_range(dev->subordinate);
/* Map IO space for child bus, which may or may not succeed */
pcibios_map_io_space(dev->subordinate);
/* Add new devices to global lists. Register in proc, sysfs. */
pci_bus_add_devices(phb->bus);
......@@ -390,7 +390,7 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn)
} else
pcibios_remove_pci_devices(bus);
if (unmap_bus_range(bus)) {
if (pcibios_unmap_io_space(bus)) {
printk(KERN_ERR "%s: failed to unmap bus range\n",
__FUNCTION__);
return -ERANGE;
......
......@@ -29,7 +29,7 @@
#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL);
#include <linux/pci.h>
#include <asm/ppc-pci.h> /* for ppc64_isabridge_dev */
#include <asm/ppc-pci.h> /* for isa_bridge_pcidev */
#define fd_dma_setup(addr,size,mode,io) fd_ops->_dma_setup(addr,size,mode,io)
......@@ -139,12 +139,12 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
if (bus_addr
&& (addr != prev_addr || size != prev_size || dir != prev_dir)) {
/* different from last time -- unmap prev */
pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir);
pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir);
bus_addr = 0;
}
if (!bus_addr) /* need to map it */
bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir);
bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir);
/* remember this one as prev */
prev_addr = addr;
......
......@@ -607,9 +607,9 @@ static inline void iosync(void)
*
* * iounmap undoes such a mapping and can be hooked
*
* * __ioremap_explicit (and the pending __iounmap_explicit) are low level
* functions to create hand-made mappings for use only by the PCI code
* and cannot currently be hooked.
* * __ioremap_at (and the pending __iounmap_at) are low level functions to
* create hand-made mappings for use only by the PCI code and cannot
* currently be hooked. Must be page aligned.
*
* * __ioremap is the low level implementation used by ioremap and
* ioremap_flags and cannot be hooked (but can be used by a hook on one
......@@ -629,12 +629,9 @@ extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
unsigned long flags);
extern void __iounmap(volatile void __iomem *addr);
extern int __ioremap_explicit(phys_addr_t p_addr, unsigned long v_addr,
unsigned long size, unsigned long flags);
extern int __iounmap_explicit(volatile void __iomem *start,
unsigned long size);
extern void __iomem * reserve_phb_iospace(unsigned long size);
extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
unsigned long size, unsigned long flags);
extern void __iounmap_at(void *ea, unsigned long size);
/* Those are more 32 bits only functions */
extern unsigned long iopa(unsigned long addr);
......@@ -651,8 +648,8 @@ extern void io_block_mapping(unsigned long virt, phys_addr_t phys,
*/
#define HAVE_ARCH_PIO_SIZE 1
#define PIO_OFFSET 0x00000000UL
#define PIO_MASK 0x3fffffffUL
#define PIO_RESERVED 0x40000000UL
#define PIO_MASK (FULL_IO_SIZE - 1)
#define PIO_RESERVED (FULL_IO_SIZE)
#define mmio_read16be(addr) readw_be(addr)
#define mmio_read32be(addr) readl_be(addr)
......
......@@ -31,6 +31,7 @@ struct pci_controller {
int last_busno;
void __iomem *io_base_virt;
void *io_base_alloc;
resource_size_t io_base_phys;
/* Some machines have a non 1:1 mapping of
......@@ -167,6 +168,11 @@ static inline unsigned long pci_address_to_pio(phys_addr_t address)
}
#endif
extern void isa_bridge_find_early(struct pci_controller *hose);
extern int pcibios_unmap_io_space(struct pci_bus *bus);
extern int pcibios_map_io_space(struct pci_bus *bus);
/* Return values for ppc_md.pci_probe_mode function */
#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
......
......@@ -220,10 +220,6 @@ static inline struct resource *pcibios_select_root(struct pci_dev *pdev,
return root;
}
extern int unmap_bus_range(struct pci_bus *bus);
extern int remap_bus_range(struct pci_bus *bus);
extern void pcibios_fixup_device_resources(struct pci_dev *dev,
struct pci_bus *bus);
......
......@@ -27,7 +27,7 @@ struct mm_struct;
*/
#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
#if TASK_SIZE_USER64 > PGTABLE_RANGE
#error TASK_SIZE_USER64 exceeds pagetable range
......@@ -37,19 +37,28 @@ struct mm_struct;
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
/*
* Define the address range of the vmalloc VM area.
*/
#define VMALLOC_START ASM_CONST(0xD000000000000000)
#define VMALLOC_SIZE ASM_CONST(0x80000000000)
#define VMALLOC_SIZE (PGTABLE_RANGE >> 1)
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
/*
* Define the address range of the imalloc VM area.
* Define the address ranges for MMIO and IO space :
*
* ISA_IO_BASE = VMALLOC_END, 64K reserved area
* PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
* IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
*/
#define PHBS_IO_BASE VMALLOC_END
#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */
#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE)
#define FULL_IO_SIZE 0x80000000ul
#define ISA_IO_BASE (VMALLOC_END)
#define ISA_IO_END (VMALLOC_END + 0x10000ul)
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (VMALLOC_END + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
#define IOREMAP_END (VMALLOC_START + PGTABLE_RANGE)
/*
* Region IDs
......
......@@ -26,7 +26,7 @@ extern int global_phb_number;
extern void find_and_init_phbs(void);
extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */
extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */
/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
#define BUID_HI(buid) ((buid) >> 32)
......@@ -47,8 +47,8 @@ extern void init_pci_config_tokens (void);
extern unsigned long get_phb_buid (struct device_node *);
extern int rtas_setup_phb(struct pci_controller *phb);
/* From pSeries_pci.h */
extern void pSeries_final_fixup(void);
/* From iSeries PCI */
extern void iSeries_pcibios_init(void);
extern unsigned long pci_probe_only;
......
......@@ -155,6 +155,11 @@ static inline void flush_tlb_kernel_range(unsigned long start,
{
}
/* Private function for use by PCI IO mapping code */
extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
unsigned long end);
#endif
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment