diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 43329eaf2f9b86daf035038aa08aa249abbd3b61..d7788028514bc485aa020865e614b00503a54918 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -210,6 +210,11 @@ config FORCE_MAX_ZONEORDER
 	depends on SA1111
 	default "9"
 
+config DMABOUNCE
+	bool
+	depends on SA1111
+	default y
+
 source arch/arm/mm/Kconfig
 
 #  bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index f6e1f1e1d3f7896addadcd473df4634784f8b55c..f326bac694fdbd57253572f62b2ea3c337a4d74e 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -5,6 +5,7 @@
 obj-y				+= platform.o
 obj-$(CONFIG_ARM_AMBA)		+= amba.o
 obj-$(CONFIG_ICST525)		+= icst525.o
-obj-$(CONFIG_SA1111)		+= sa1111.o sa1111-pcibuf.o
+obj-$(CONFIG_SA1111)		+= sa1111.o
 obj-$(CONFIG_PCI_HOST_PLX90X0)	+= plx90x0.o
 obj-$(CONFIG_PCI_HOST_VIA82C505) += via82c505.o
+obj-$(CONFIG_DMABOUNCE)		+= dmabounce.o
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
new file mode 100644
index 0000000000000000000000000000000000000000..c4c454edb95e85a4757b4f95ebcc4e77dd182eb8
--- /dev/null
+++ b/arch/arm/common/dmabounce.c
@@ -0,0 +1,675 @@
+/*
+ *  arch/arm/common/dmabounce.c
+ *
+ *  Special dma_{map/unmap/dma_sync}_* routines for systems that have
+ *  limited DMA windows. These functions utilize bounce buffers to
+ *  copy data to/from buffers located outside the DMA region. This
+ *  only works for systems in which DMA memory is at the bottom of
+ *  RAM and the remainder of memory is at the top an the DMA memory
+ *  can be marked as ZONE_DMA. Anything beyond that such as discontigous
+ *  DMA windows will require custom implementations that reserve memory
+ *  areas at early bootup.
+ *
+ *  Original version by Brad Parker (brad@heeltoe.com)
+ *  Re-written by Christopher Hoover <ch@murgatroid.com>
+ *  Made generic by Deepak Saxena <dsaxena@plexity.net>
+ *
+ *  Copyright (C) 2002 Hewlett Packard Company.
+ *  Copyright (C) 2004 MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+
+#undef DEBUG
+
+#undef STATS
+#ifdef STATS
+#define DO_STATS(X) do { X ; } while (0)
+#else
+#define DO_STATS(X) do { } while (0)
+#endif
+
+/* ************************************************** */
+
+struct safe_buffer {
+	struct list_head node;
+
+	/* original request */
+	void		*ptr;
+	size_t		size;
+	int		direction;
+
+	/* safe buffer info */
+	struct dma_pool *pool;
+	void		*safe;
+	dma_addr_t	safe_dma_addr;
+};
+
+struct dmabounce_device_info {
+	struct list_head node;
+
+	struct device *dev;
+	struct dma_pool *small_buffer_pool;
+	struct dma_pool *large_buffer_pool;
+	struct list_head safe_buffers;
+	unsigned long small_buffer_size, large_buffer_size;
+#ifdef STATS
+	unsigned long sbp_allocs;
+	unsigned long lbp_allocs;
+	unsigned long total_allocs;
+	unsigned long map_op_count;
+	unsigned long bounce_count;
+#endif
+};
+
+static LIST_HEAD(dmabounce_devs);
+
+#ifdef STATS
+static void print_alloc_stats(struct dmabounce_device_info *device_info)
+{
+	printk(KERN_INFO
+		"%s: dmabounce: sbp: %lu, lbp: %lu, other: %lu, total: %lu\n",
+		device_info->dev->bus_id,
+		device_info->sbp_allocs, device_info->lbp_allocs,
+		device_info->total_allocs - device_info->sbp_allocs -
+			device_info->lbp_allocs,
+		device_info->total_allocs);
+}
+#endif
+
+/* find the given device in the dmabounce device list */
+static inline struct dmabounce_device_info *
+find_dmabounce_dev(struct device *dev)
+{
+	struct list_head *entry;
+
+	list_for_each(entry, &dmabounce_devs) {
+		struct dmabounce_device_info *d =
+			list_entry(entry, struct dmabounce_device_info, node);
+
+		if (d->dev == dev)
+			return d;
+	}
+}
+
+
+/* allocate a 'safe' buffer and keep track of it */
+static inline struct safe_buffer *
+alloc_safe_buffer(struct dmabounce_device_info *device_info, void *ptr,
+			size_t size, enum dma_data_direction dir)
+{
+	struct safe_buffer *buf;
+	struct dma_pool *pool;
+	struct device *dev = device_info->dev;
+	void *safe;
+	dma_addr_t safe_dma_addr;
+
+	dev_dbg(dev, "%s(ptr=%p, size=%d, dir=%d)\n",
+		__func__, ptr, size, dir);
+
+	DO_STATS ( device_info->total_allocs++ );
+
+	buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
+	if (buf == 0) {
+		dev_warn(dev, "%s: kmalloc failed\n", __func__);
+		return 0;
+	}
+
+	if (size <= device_info->small_buffer_size) {
+		pool = device_info->small_buffer_pool;
+		safe = dma_pool_alloc(pool, GFP_ATOMIC, &safe_dma_addr);
+
+		DO_STATS ( device_info->sbp_allocs++ );
+	} else if (size <= device_info->large_buffer_size) {
+		pool = device_info->large_buffer_pool;
+		safe = dma_pool_alloc(pool, GFP_ATOMIC, &safe_dma_addr);
+
+		DO_STATS ( device_info->lbp_allocs++ );
+	} else {
+		pool = 0;
+		safe = dma_alloc_coherent(dev, size, &safe_dma_addr, GFP_ATOMIC);
+	}
+
+	if (safe == 0) {
+		dev_warn(device_info->dev,
+			"%s: could not alloc dma memory (size=%d)\n",
+		       __func__, size);
+		kfree(buf);
+		return 0;
+	}
+
+#ifdef STATS
+	if (device_info->total_allocs % 1000 == 0)
+		print_alloc_stats(device_info);
+#endif
+
+	buf->ptr = ptr;
+	buf->size = size;
+	buf->direction = dir;
+	buf->pool = pool;
+	buf->safe = safe;
+	buf->safe_dma_addr = safe_dma_addr;
+
+	list_add(&buf->node, &device_info->safe_buffers);
+
+	return buf;
+}
+
+/* determine if a buffer is from our "safe" pool */
+static inline struct safe_buffer *
+find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_addr)
+{
+	struct list_head *entry;
+
+	list_for_each(entry, &device_info->safe_buffers) {
+		struct safe_buffer *b =
+			list_entry(entry, struct safe_buffer, node);
+
+		if (b->safe_dma_addr == safe_dma_addr)
+			return b;
+	}
+
+	return NULL;
+}
+
+static inline void
+free_safe_buffer(struct dmabounce_device_info *device_info, struct safe_buffer *buf)
+{
+	dev_dbg(dev_info->dev, "%s(buf=%p)\n", __func__, buf);
+
+	list_del(&buf->node);
+
+	if (buf->pool)
+		dma_pool_free(buf->pool, buf->safe, buf->safe_dma_addr);
+	else
+		dma_free_coherent(device_info->dev, buf->size, buf->safe,
+				    buf->safe_dma_addr);
+
+	kfree(buf);
+}
+
+/* ************************************************** */
+
+#ifdef STATS
+
+static void print_map_stats(struct dmabounce_device_info *device_info)
+{
+	printk(KERN_INFO
+		"%s: dmabounce: map_op_count=%lu, bounce_count=%lu\n",
+		device_info->dev->bus_id,
+		device_info->map_op_count, device_info->bounce_count);
+}
+#endif
+
+static inline dma_addr_t
+map_single(struct device *dev, void *ptr, size_t size,
+		enum dma_data_direction dir)
+{
+	dma_addr_t dma_addr;
+	struct dmabounce_device_info *device_info = find_dmabounce_dev(dev);
+
+	if (device_info)
+		DO_STATS ( device_info->map_op_count++ );
+
+	if (dev->dma_mask) {
+		unsigned long limit;
+
+		limit = (*dev->dma_mask + 1) & ~(*dev->dma_mask);
+		if (limit && (size > limit)) {
+			dev_err(dev, "DMA mapping too big "
+				"(requested %#x mask %#Lx)\n",
+				size, *dev->dma_mask);
+			return ~0;
+		}
+	}
+
+	dma_addr = virt_to_bus(ptr);
+
+	if (device_info && dma_needs_bounce(dev, dma_addr, size)) {
+		struct safe_buffer *buf;
+
+		buf = alloc_safe_buffer(device_info, ptr, size, dir);
+		if (buf == 0) {
+			dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
+			       __func__, ptr);
+			return 0;
+		}
+
+		dev_dbg(dev,
+			"%s: unsafe buffer %p (phy=%p) mapped to %p (phy=%p)\n",
+			__func__, buf->ptr, (void *) virt_to_bus(buf->ptr),
+			buf->safe, (void *) buf->safe_dma_addr);
+
+		if ((dir == DMA_TO_DEVICE) ||
+		    (dir == DMA_BIDIRECTIONAL)) {
+			dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n",
+				__func__, ptr, buf->safe, size);
+			memcpy(buf->safe, ptr, size);
+		}
+		consistent_sync(buf->safe, size, dir);
+
+		dma_addr = buf->safe_dma_addr;
+	} else {
+		consistent_sync(ptr, size, dir);
+	}
+
+	return dma_addr;
+}
+
+static inline void
+unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		enum dma_data_direction dir)
+{
+	struct dmabounce_device_info *device_info = find_dmabounce_dev(dev);
+	struct safe_buffer *buf = NULL;
+
+	/*
+	 * Trying to unmap an invalid mapping
+	 */
+	if (dma_addr == ~0) {
+		dev_err(dev, "Trying to unmap invalid mapping\n");
+		return;
+	}
+
+	if (device_info)
+		buf = find_safe_buffer(device_info, dma_addr);
+
+	if (buf) {
+		BUG_ON(buf->size != size);
+
+		dev_dbg(dev,
+			"%s: unsafe buffer %p (phy=%p) mapped to %p (phy=%p)\n",
+			__func__, buf->ptr, (void *) virt_to_bus(buf->ptr),
+			buf->safe, (void *) buf->safe_dma_addr);
+
+
+		DO_STATS ( device_info->bounce_count++ );
+
+		if ((dir == DMA_FROM_DEVICE) ||
+		    (dir == DMA_BIDIRECTIONAL)) {
+			dev_dbg(dev,
+				"%s: copy back safe %p to unsafe %p size %d\n",
+				__func__, buf->safe, buf->ptr, size);
+			memcpy(buf->ptr, buf->safe, size);
+		}
+		free_safe_buffer(device_info, buf);
+	}
+}
+
+static inline void
+sync_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		enum dma_data_direction dir)
+{
+	struct dmabounce_device_info *device_info = find_dmabounce_dev(dev);
+	struct safe_buffer *buf = NULL;
+
+	if (device_info)
+		buf = find_safe_buffer(device_info, dma_addr);
+
+	if (buf) {
+		/*
+		 * Both of these checks from original code need to be
+		 * commented out b/c some drivers rely on the following:
+		 *
+		 * 1) Drivers may map a large chunk of memory into DMA space
+		 *    but only sync a small portion of it. Good example is
+		 *    allocating a large buffer, mapping it, and then
+		 *    breaking it up into small descriptors. No point
+		 *    in syncing the whole buffer if you only have to
+		 *    touch one descriptor.
+		 *
+		 * 2) Buffers that are mapped as DMA_BIDIRECTIONAL are
+		 *    usually only synced in one dir at a time.
+		 *
+		 * See drivers/net/eepro100.c for examples of both cases.
+		 *
+		 * -ds
+		 *
+		 * BUG_ON(buf->size != size);
+		 * BUG_ON(buf->direction != dir);
+		 */
+
+		dev_dbg(dev,
+			"%s: unsafe buffer %p (phy=%p) mapped to %p (phy=%p)\n",
+			__func__, buf->ptr, (void *) virt_to_bus(buf->ptr),
+			buf->safe, (void *) buf->safe_dma_addr);
+
+		DO_STATS ( device_info->bounce_count++ );
+
+		switch (dir) {
+		case DMA_FROM_DEVICE:
+			dev_dbg(dev,
+				"%s: copy back safe %p to unsafe %p size %d\n",
+				__func__, buf->safe, buf->ptr, size);
+			memcpy(buf->ptr, buf->safe, size);
+			break;
+		case DMA_TO_DEVICE:
+			dev_dbg(dev,
+				"%s: copy out unsafe %p to safe %p, size %d\n",
+				__func__,buf->ptr, buf->safe, size);
+			memcpy(buf->safe, buf->ptr, size);
+			break;
+		case DMA_BIDIRECTIONAL:
+			BUG();	/* is this allowed?  what does it mean? */
+		default:
+			BUG();
+		}
+		consistent_sync(buf->safe, size, dir);
+	} else {
+		consistent_sync(bus_to_virt(dma_addr), size, dir);
+	}
+}
+
+/* ************************************************** */
+
+/*
+ * see if a buffer address is in an 'unsafe' range.  if it is
+ * allocate a 'safe' buffer and copy the unsafe buffer into it.
+ * substitute the safe buffer for the unsafe one.
+ * (basically move the buffer from an unsafe area to a safe one)
+ */
+dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+		enum dma_data_direction dir)
+{
+	unsigned long flags;
+	dma_addr_t dma_addr;
+
+	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
+		__func__, ptr, size, dir);
+
+	BUG_ON(dir == DMA_NONE);
+
+	local_irq_save(flags);
+
+	dma_addr = map_single(dev, ptr, size, dir);
+
+	local_irq_restore(flags);
+
+	return dma_addr;
+}
+
+/*
+ * see if a mapped address was really a "safe" buffer and if so, copy
+ * the data from the safe buffer back to the unsafe buffer and free up
+ * the safe buffer.  (basically return things back to the way they
+ * should be)
+ */
+
+void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+			enum dma_data_direction dir)
+{
+	unsigned long flags;
+
+	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
+		__func__, (void *) dma_addr, size, dir);
+
+	BUG_ON(dir == DMA_NONE);
+
+	local_irq_save(flags);
+
+	unmap_single(dev, dma_addr, size, dir);
+
+	local_irq_restore(flags);
+}
+
+int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir)
+{
+	unsigned long flags;
+	int i;
+
+	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
+		__func__, sg, nents, dir);
+
+	BUG_ON(dir == DMA_NONE);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < nents; i++, sg++) {
+		struct page *page = sg->page;
+		unsigned int offset = sg->offset;
+		unsigned int length = sg->length;
+		void *ptr = page_address(page) + offset;
+
+		sg->dma_address =
+			map_single(dev, ptr, length, dir);
+	}
+
+	local_irq_restore(flags);
+
+	return nents;
+}
+
+void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir)
+{
+	unsigned long flags;
+	int i;
+
+	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
+		__func__, sg, nents, dir);
+
+	BUG_ON(dir == DMA_NONE);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < nents; i++, sg++) {
+		dma_addr_t dma_addr = sg->dma_address;
+		unsigned int length = sg->length;
+
+		unmap_single(dev, dma_addr, length, dir);
+	}
+
+	local_irq_restore(flags);
+}
+
+void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size,
+				enum dma_data_direction dir)
+{
+	unsigned long flags;
+
+	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
+		__func__, (void *) dma_addr, size, dir);
+
+	local_irq_save(flags);
+
+	sync_single(dev, dma_addr, size, dir);
+
+	local_irq_restore(flags);
+}
+
+void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
+				enum dma_data_direction dir)
+{
+	unsigned long flags;
+
+	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
+		__func__, (void *) dma_addr, size, dir);
+
+	local_irq_save(flags);
+
+	sync_single(dev, dma_addr, size, dir);
+
+	local_irq_restore(flags);
+}
+
+void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
+			enum dma_data_direction dir)
+{
+	unsigned long flags;
+	int i;
+
+	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
+		__func__, sg, nents, dir);
+
+	BUG_ON(dir == DMA_NONE);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < nents; i++, sg++) {
+		dma_addr_t dma_addr = sg->dma_address;
+		unsigned int length = sg->length;
+
+		sync_single(dev, dma_addr, length, dir);
+	}
+
+	local_irq_restore(flags);
+}
+
+void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
+			enum dma_data_direction dir)
+{
+	unsigned long flags;
+	int i;
+
+	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
+		__func__, sg, nents, dir);
+
+	BUG_ON(dir == DMA_NONE);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < nents; i++, sg++) {
+		dma_addr_t dma_addr = sg->dma_address;
+		unsigned int length = sg->length;
+
+		sync_single(dev, dma_addr, length, dir);
+	}
+
+	local_irq_restore(flags);
+}
+
+int
+dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
+			unsigned long large_buffer_size)
+{
+	struct dmabounce_device_info *device_info;
+
+	device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
+	if (!device_info) {
+		printk(KERN_ERR
+			"Could not allocated dmabounce_device_info for %s",
+			dev->bus_id);
+		return -ENOMEM;
+	}
+
+	device_info->small_buffer_pool =
+		dma_pool_create("small_dmabounce_pool",
+				dev,
+				small_buffer_size,
+				0 /* byte alignment */,
+				0 /* no page-crossing issues */);
+	if (!device_info->small_buffer_pool) {
+		printk(KERN_ERR
+			"dmabounce: could not allocate small DMA pool for %s\n",
+			dev->bus_id);
+		kfree(device_info);
+		return -ENOMEM;
+	}
+
+	if (large_buffer_size) {
+		device_info->large_buffer_pool =
+			dma_pool_create("large_dmabounce_pool",
+					dev,
+					large_buffer_size,
+					0 /* byte alignment */,
+					0 /* no page-crossing issues */);
+		if (!device_info->large_buffer_pool) {
+		printk(KERN_ERR
+			"dmabounce: could not allocate large DMA pool for %s\n",
+			dev->bus_id);
+			dma_pool_destroy(device_info->small_buffer_pool);
+
+			return -ENOMEM;
+		}
+	}
+
+	device_info->dev = dev;
+	device_info->small_buffer_size = small_buffer_size;
+	device_info->large_buffer_size = large_buffer_size;
+	INIT_LIST_HEAD(&device_info->safe_buffers);
+
+#ifdef STATS
+	device_info->sbp_allocs = 0;
+	device_info->lbp_allocs = 0;
+	device_info->total_allocs = 0;
+	device_info->map_op_count = 0;
+	device_info->bounce_count = 0;
+#endif
+
+	list_add(&device_info->node, &dmabounce_devs);
+
+	printk(KERN_INFO "dmabounce: registered device %s on %s bus\n",
+		dev->bus_id, dev->bus->name);
+
+	return 0;
+}
+
+void
+dmabounce_unregister_dev(struct device *dev)
+{
+	struct dmabounce_device_info *device_info = find_dmabounce_dev(dev);
+
+	if (!device_info) {
+		printk(KERN_WARNING
+			"%s: Never registered with dmabounce but attempting" \
+			"to unregister!\n", dev->bus_id);
+		return;
+	}
+
+	if (!list_empty(&device_info->safe_buffers)) {
+		printk(KERN_ERR,
+			"%s: Removing from dmabounce with pending buffers!\n",
+			dev->bus_id);
+		BUG();
+	}
+
+	if (device_info->small_buffer_pool)
+		dma_pool_destroy(device_info->small_buffer_pool);
+	if (device_info->large_buffer_pool)
+		dma_pool_destroy(device_info->large_buffer_pool);
+
+#ifdef STATS
+	print_alloc_stats(device_info);
+	print_map_stats(device_info);
+#endif
+
+	list_del(&device_info->node);
+
+	kfree(device_info);
+
+	printk(KERN_INFO "dmabounce: device %s on %s bus unregistered\n",
+		dev->bus_id, dev->bus->name);
+}
+
+
+EXPORT_SYMBOL(dma_map_single);
+EXPORT_SYMBOL(dma_unmap_single);
+EXPORT_SYMBOL(dma_map_sg);
+EXPORT_SYMBOL(dma_unmap_sg);
+EXPORT_SYMBOL(dma_sync_single);
+EXPORT_SYMBOL(dma_sync_sg);
+EXPORT_SYMBOL(dmabounce_register_dev);
+EXPORT_SYMBOL(dmabounce_unregister_dev);
+
+MODULE_AUTHOR("Christopher Hoover <ch@hpl.hp.com>, Deepak Saxena <dsaxena@plexity.net>");
+MODULE_DESCRIPTION("Special dma_{map/unmap/dma_sync}_* routines for systems with limited DMA windows");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/common/sa1111-pcibuf.c b/arch/arm/common/sa1111-pcibuf.c
deleted file mode 100644
index ed84eb5883e8863e722bf378f7c8e82836c967c7..0000000000000000000000000000000000000000
--- a/arch/arm/common/sa1111-pcibuf.c
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- *  linux/arch/arm/mach-sa1100/sa1111-pcibuf.c
- *
- *  Special dma_{map/unmap/dma_sync}_* routines for SA-1111.
- *
- *  These functions utilize bouncer buffers to compensate for a bug in
- *  the SA-1111 hardware which don't allow DMA to/from addresses
- *  certain addresses above 1MB.
- *
- *  Re-written by Christopher Hoover <ch@murgatroid.com>
- *  Original version by Brad Parker (brad@heeltoe.com)
- *
- *  Copyright (C) 2002 Hewlett Packard Company.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  version 2 as published by the Free Software Foundation.
- * */
-
-//#define DEBUG
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmapool.h>
-#include <asm/hardware/sa1111.h>
-
-//#define STATS
-#ifdef STATS
-#define DO_STATS(X) do { X ; } while (0)
-#else
-#define DO_STATS(X) do { } while (0)
-#endif
-
-/* ************************************************** */
-
-struct safe_buffer {
-	struct list_head node;
-
-	/* original request */
-	void		*ptr;
-	size_t		size;
-	enum dma_data_direction direction;
-
-	/* safe buffer info */
-	struct dma_pool *pool;
-	void		*safe;
-	dma_addr_t	safe_dma_addr;
-	struct device	*dev;
-};
-
-static LIST_HEAD(safe_buffers);
-
-
-#define SIZE_SMALL	1024
-#define SIZE_LARGE	(4*1024)
-
-static struct dma_pool *small_buffer_pool, *large_buffer_pool;
-
-#ifdef STATS
-static unsigned long sbp_allocs __initdata = 0;
-static unsigned long lbp_allocs __initdata = 0;
-static unsigned long total_allocs __initdata= 0;
-
-static void print_alloc_stats(void)
-{
-	printk(KERN_INFO
-	       "sa1111_dmabuf: sbp: %lu, lbp: %lu, other: %lu, total: %lu\n",
-	       sbp_allocs, lbp_allocs,
-	       total_allocs - sbp_allocs - lbp_allocs, total_allocs);
-}
-#endif
-
-static int __init create_safe_buffer_pools(void)
-{
-	small_buffer_pool = dma_pool_create("sa1111_small_dma_buffer",
-					    NULL, SIZE_SMALL,
-					    0 /* byte alignment */,
-					    0 /* no page-crossing issues */);
-	if (small_buffer_pool == NULL) {
-		printk(KERN_ERR
-		       "sa1111_dmabuf: could not allocate small pci pool\n");
-		return -ENOMEM;
-	}
-
-	large_buffer_pool = dma_pool_create("sa1111_large_dma_buffer",
-					    NULL, SIZE_LARGE,
-					    0 /* byte alignment */,
-					    0 /* no page-crossing issues */);
-	if (large_buffer_pool == NULL) {
-		printk(KERN_ERR
-		       "sa1111_dmabuf: could not allocate large pci pool\n");
-		dma_pool_destroy(small_buffer_pool);
-		small_buffer_pool = NULL;
-		return -ENOMEM;
-	}
-
-	printk(KERN_INFO "SA1111: DMA buffer sizes: small=%u, large=%u\n",
-	       SIZE_SMALL, SIZE_LARGE);
-
-	return 0;
-}
-
-static void __exit destroy_safe_buffer_pools(void)
-{
-	if (small_buffer_pool)
-		dma_pool_destroy(small_buffer_pool);
-	if (large_buffer_pool)
-		dma_pool_destroy(large_buffer_pool);
-
-	small_buffer_pool = large_buffer_pool = NULL;
-}
-
-
-/* allocate a 'safe' buffer and keep track of it */
-static struct safe_buffer *alloc_safe_buffer(struct device *dev, void *ptr,
-					     size_t size,
-					     enum dma_data_direction dir)
-{
-	struct safe_buffer *buf;
-	struct dma_pool *pool;
-	void *safe;
-	dma_addr_t safe_dma_addr;
-
-	dev_dbg(dev, "%s(ptr=%p, size=%d, direction=%d)\n",
-		__func__, ptr, size, dir);
-
-	DO_STATS ( total_allocs++ );
-
-	buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
-	if (buf == NULL) {
-		printk(KERN_WARNING "%s: kmalloc failed\n", __func__);
-		return 0;
-	}
-
-	if (size <= SIZE_SMALL) {
-		pool = small_buffer_pool;
-		safe = dma_pool_alloc(pool, GFP_ATOMIC, &safe_dma_addr);
-
-		DO_STATS ( sbp_allocs++ );
-	} else if (size <= SIZE_LARGE) {
-		pool = large_buffer_pool;
-		safe = dma_pool_alloc(pool, GFP_ATOMIC, &safe_dma_addr);
-
-		DO_STATS ( lbp_allocs++ );
-	} else {
-		pool = NULL;
-		safe = dma_alloc_coherent(dev, size, &safe_dma_addr, GFP_ATOMIC);
-	}
-
-	if (safe == NULL) {
-		printk(KERN_WARNING
-		       "%s: could not alloc dma memory (size=%d)\n",
-		       __func__, size);
-		kfree(buf);
-		return 0;
-	}
-
-#ifdef STATS
-	if (total_allocs % 1000 == 0)
-		print_alloc_stats();
-#endif
-
-	BUG_ON(sa1111_check_dma_bug(safe_dma_addr));	// paranoia
-
-	buf->ptr = ptr;
-	buf->size = size;
-	buf->direction = dir;
-	buf->pool = pool;
-	buf->safe = safe;
-	buf->safe_dma_addr = safe_dma_addr;
-	buf->dev = dev;
-
-	list_add(&buf->node, &safe_buffers);
-
-	return buf;
-}
-
-/* determine if a buffer is from our "safe" pool */
-static struct safe_buffer *find_safe_buffer(struct device *dev,
-					    dma_addr_t safe_dma_addr)
-{
-	struct list_head *entry;
-
-	list_for_each(entry, &safe_buffers) {
-		struct safe_buffer *b =
-			list_entry(entry, struct safe_buffer, node);
-
-		if (b->safe_dma_addr == safe_dma_addr &&
-		    b->dev == dev) {
-			return b;
-		}
-	}
-
-	return 0;
-}
-
-static void free_safe_buffer(struct safe_buffer *buf)
-{
-	pr_debug("%s(buf=%p)\n", __func__, buf);
-
-	list_del(&buf->node);
-
-	if (buf->pool)
-		dma_pool_free(buf->pool, buf->safe, buf->safe_dma_addr);
-	else
-		dma_free_coherent(buf->dev, buf->size, buf->safe,
-				  buf->safe_dma_addr);
-	kfree(buf);
-}
-
-static inline int dma_range_is_safe(struct device *dev, dma_addr_t addr,
-				    size_t size)
-{
-	unsigned int physaddr = SA1111_DMA_ADDR((unsigned int) addr);
-
-	/* Any address within one megabyte of the start of the target
-         * bank will be OK.  This is an overly conservative test:
-         * other addresses can be OK depending on the dram
-         * configuration.  (See sa1111.c:sa1111_check_dma_bug() * for
-         * details.)
-	 *
-	 * We take care to ensure the entire dma region is within
-	 * the safe range.
-	 */
-
-	return ((physaddr + size - 1) < (1<<20));
-}
-
-/* ************************************************** */
-
-#ifdef STATS
-static unsigned long map_op_count __initdata = 0;
-static unsigned long bounce_count __initdata = 0;
-
-static void print_map_stats(void)
-{
-	printk(KERN_INFO
-	       "sa1111_dmabuf: map_op_count=%lu, bounce_count=%lu\n",
-	       map_op_count, bounce_count);
-}
-#endif
-
-static dma_addr_t map_single(struct device *dev, void *ptr,
-			     size_t size, enum dma_data_direction dir)
-{
-	dma_addr_t dma_addr;
-
-	DO_STATS ( map_op_count++ );
-
-	dma_addr = virt_to_bus(ptr);
-
-	if (!dma_range_is_safe(dev, dma_addr, size)) {
-		struct safe_buffer *buf;
-
-		DO_STATS ( bounce_count++ ) ;
-
-		buf = alloc_safe_buffer(dev, ptr, size, dir);
-		if (buf == NULL) {
-			printk(KERN_ERR
-			       "%s: unable to map unsafe buffer %p!\n",
-			       __func__, ptr);
-			return 0;
-		}
-
-		dev_dbg(dev, "%s: unsafe buffer %p (phy=%08lx) mapped to %p (phy=%08x)\n",
-			__func__,
-			buf->ptr, virt_to_bus(buf->ptr),
-			buf->safe, buf->safe_dma_addr);
-
-		if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
-			dev_dbg(dev, "%s: copy out from unsafe %p, to safe %p, size %d\n",
-				__func__, ptr, buf->safe, size);
-			memcpy(buf->safe, ptr, size);
-		}
-
-		dma_addr = buf->safe_dma_addr;
-		ptr = buf->safe;
-	}
-
-	consistent_sync(ptr, size, dir);
-
-#ifdef STATS
-	if (map_op_count % 1000 == 0)
-		print_map_stats();
-#endif
-
-	return dma_addr;
-}
-
-static void unmap_single(struct device *dev, dma_addr_t dma_addr,
-			 size_t size, enum dma_data_direction dir)
-{
-	struct safe_buffer *buf;
-
-	buf = find_safe_buffer(dev, dma_addr);
-
-	if (buf) {
-		BUG_ON(buf->size != size);
-		BUG_ON(buf->direction != dir);
-
-		dev_dbg(dev, "%s: unsafe buffer %p (phy=%08lx) mapped to %p (phy=%08lx)\n",
-			__func__,
-			buf->ptr, virt_to_bus(buf->ptr),
-			buf->safe, buf->safe_dma_addr);
-
-		DO_STATS ( bounce_count++ );
-
-		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
-			dev_dbg(dev, "%s: copy back from safe %p, to unsafe %p size %d\n",
-				__func__, buf->safe, buf->ptr, size);
-			memcpy(buf->ptr, buf->safe, size);
-		}
-		free_safe_buffer(buf);
-	}
-}
-
-static void sync_single(struct device *dev, dma_addr_t dma_addr,
-			size_t size, enum dma_data_direction dir)
-{
-	struct safe_buffer *buf;
-	void *ptr;
-
-	buf = find_safe_buffer(dev, dma_addr);
-
-	if (buf) {
-		BUG_ON(buf->size != size);
-		BUG_ON(buf->direction != dir);
-
-		dev_dbg(dev, "%s: unsafe buffer %p (phy=%08lx) mapped to %p (phy=%08lx)\n",
-			__func__,
-			buf->ptr, virt_to_bus(buf->ptr),
-			buf->safe, buf->safe_dma_addr);
-
-		DO_STATS ( bounce_count++ );
-
-		switch (dir) {
-		case DMA_FROM_DEVICE:
-			dev_dbg(dev, "%s: copy back from safe %p, to unsafe %p size %d\n",
-				__func__, buf->safe, buf->ptr, size);
-			memcpy(buf->ptr, buf->safe, size);
-			break;
-		case DMA_TO_DEVICE:
-			dev_dbg(dev, "%s: copy out from unsafe %p, to safe %p, size %d\n",
-				__func__,buf->ptr, buf->safe, size);
-			memcpy(buf->safe, buf->ptr, size);
-			break;
-		case DMA_BIDIRECTIONAL:
-			BUG();	/* is this allowed?  what does it mean? */
-		default:
-			BUG();
-		}
-		ptr = buf->safe;
-	} else {
-		ptr = bus_to_virt(dma_addr);
-	}
-	consistent_sync(ptr, size, dir);
-}
-
-/* ************************************************** */
-
-/*
- * see if a buffer address is in an 'unsafe' range.  if it is
- * allocate a 'safe' buffer and copy the unsafe buffer into it.
- * substitute the safe buffer for the unsafe one.
- * (basically move the buffer from an unsafe area to a safe one)
- */
-dma_addr_t sa1111_map_single(struct device *dev, void *ptr,
-			     size_t size, enum dma_data_direction dir)
-{
-	unsigned long flags;
-	dma_addr_t dma_addr;
-
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-	       __func__, ptr, size, dir);
-
-	BUG_ON(dir == DMA_NONE);
-
-	local_irq_save(flags);
-
-	dma_addr = map_single(dev, ptr, size, dir);
-
-	local_irq_restore(flags);
-
-	return dma_addr;
-}
-
-/*
- * see if a mapped address was really a "safe" buffer and if so, copy
- * the data from the safe buffer back to the unsafe buffer and free up
- * the safe buffer.  (basically return things back to the way they
- * should be)
- */
-void sa1111_unmap_single(struct device *dev, dma_addr_t dma_addr,
-		         size_t size, enum dma_data_direction dir)
-{
-	unsigned long flags;
-
-	dev_dbg(dev, "%s(ptr=%08lx,size=%d,dir=%x)\n",
-		__func__, dma_addr, size, dir);
-
-	local_irq_save(flags);
-
-	unmap_single(dev, dma_addr, size, dir);
-
-	local_irq_restore(flags);
-}
-
-int sa1111_map_sg(struct device *dev, struct scatterlist *sg,
-		  int nents, enum dma_data_direction dir)
-{
-	unsigned long flags;
-	int i;
-
-	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
-		__func__, sg, nents, dir);
-
-	BUG_ON(dir == DMA_NONE);
-
-	local_irq_save(flags);
-
-	for (i = 0; i < nents; i++, sg++) {
-		struct page *page = sg->page;
-		unsigned int offset = sg->offset;
-		unsigned int length = sg->length;
-		void *ptr = page_address(page) + offset;
-
-		sg->dma_address = map_single(dev, ptr, length, dir);
-	}
-
-	local_irq_restore(flags);
-
-	return nents;
-}
-
-void sa1111_unmap_sg(struct device *dev, struct scatterlist *sg,
-		     int nents, enum dma_data_direction dir)
-{
-	unsigned long flags;
-	int i;
-
-	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
-		__func__, sg, nents, dir);
-
-	local_irq_save(flags);
-
-	for (i = 0; i < nents; i++, sg++) {
-		dma_addr_t dma_addr = sg->dma_address;
-		unsigned int length = sg->length;
-
-		unmap_single(dev, dma_addr, length, dir);
-	}
-
-	local_irq_restore(flags);
-}
-
-void sa1111_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
-				    size_t size, enum dma_data_direction dir)
-{
-	unsigned long flags;
-
-	dev_dbg(dev, "%s(ptr=%08lx,size=%d,dir=%x)\n",
-		__func__, dma_addr, size, dir);
-
-	local_irq_save(flags);
-
-	sync_single(dev, dma_addr, size, dir);
-
-	local_irq_restore(flags);
-}
-
-void sa1111_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
-				       size_t size, enum dma_data_direction dir)
-{
-	unsigned long flags;
-
-	dev_dbg(dev, "%s(ptr=%08lx,size=%d,dir=%x)\n",
-		__func__, dma_addr, size, dir);
-
-	local_irq_save(flags);
-
-	sync_single(dev, dma_addr, size, dir);
-
-	local_irq_restore(flags);
-}
-
-void sa1111_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction dir)
-{
-	unsigned long flags;
-	int i;
-
-	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
-		__func__, sg, nents, dir);
-
-	local_irq_save(flags);
-
-	for (i = 0; i < nents; i++, sg++) {
-		dma_addr_t dma_addr = sg->dma_address;
-		unsigned int length = sg->length;
-
-		sync_single(dev, dma_addr, length, dir);
-	}
-
-	local_irq_restore(flags);
-}
-
-void sa1111_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-				   int nents, enum dma_data_direction dir)
-{
-	unsigned long flags;
-	int i;
-
-	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
-		__func__, sg, nents, dir);
-
-	local_irq_save(flags);
-
-	for (i = 0; i < nents; i++, sg++) {
-		dma_addr_t dma_addr = sg->dma_address;
-		unsigned int length = sg->length;
-
-		sync_single(dev, dma_addr, length, dir);
-	}
-
-	local_irq_restore(flags);
-}
-
-EXPORT_SYMBOL(sa1111_map_single);
-EXPORT_SYMBOL(sa1111_unmap_single);
-EXPORT_SYMBOL(sa1111_map_sg);
-EXPORT_SYMBOL(sa1111_unmap_sg);
-EXPORT_SYMBOL(sa1111_dma_sync_single_for_cpu);
-EXPORT_SYMBOL(sa1111_dma_sync_single_for_device);
-EXPORT_SYMBOL(sa1111_dma_sync_sg_for_cpu);
-EXPORT_SYMBOL(sa1111_dma_sync_sg_for_device);
-
-/* **************************************** */
-
-static int __init sa1111_dmabuf_init(void)
-{
-	printk(KERN_DEBUG "sa1111_dmabuf: initializing SA-1111 DMA buffers\n");
-
-	return create_safe_buffer_pools();
-}
-module_init(sa1111_dmabuf_init);
-
-static void __exit sa1111_dmabuf_exit(void)
-{
-	BUG_ON(!list_empty(&safe_buffers));
-
-#ifdef STATS
-	print_alloc_stats();
-	print_map_stats();
-#endif
-
-	destroy_safe_buffer_pools();
-}
-module_exit(sa1111_dmabuf_exit);
-
-MODULE_AUTHOR("Christopher Hoover <ch@hpl.hp.com>");
-MODULE_DESCRIPTION("Special dma_{map/unmap/dma_sync}_* routines for SA-1111.");
-MODULE_LICENSE("GPL");
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 18b6c61cd81ed68dea8666ad7b901a6fc924cb70..511327e887382894dfbb59a0b8cbf5043681e05d 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -25,6 +25,7 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/hardware.h>
 #include <asm/mach-types.h>
@@ -547,15 +548,6 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
 	snprintf(dev->dev.bus_id, sizeof(dev->dev.bus_id),
 		 "%4.4lx", info->offset);
 
-	/*
-	 * If the parent device has a DMA mask associated with it,
-	 * propagate it down to the children.
-	 */
-	if (sachip->dev->dma_mask) {
-		dev->dma_mask = *sachip->dev->dma_mask;
-		dev->dev.dma_mask = &dev->dma_mask;
-	}
-
 	dev->devid	 = info->devid;
 	dev->dev.parent  = sachip->dev;
 	dev->dev.bus     = &sa1111_bus_type;
@@ -573,15 +565,37 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
 	if (ret) {
 		printk("SA1111: failed to allocate resource for %s\n",
 			dev->res.name);
+		kfree(dev);
 		goto out;
 	}
 
+
 	ret = device_register(&dev->dev);
 	if (ret) {
 		release_resource(&dev->res);
- out:
 		kfree(dev);
+		goto out;
+	}
+
+	/*
+	 * If the parent device has a DMA mask associated with it,
+	 * propagate it down to the children.
+	 */
+	if (sachip->dev->dma_mask) {
+		dev->dma_mask = *sachip->dev->dma_mask;
+		dev->dev.dma_mask = &dev->dma_mask;
+
+		if (dev->dma_mask != 0xffffffffUL) {
+			ret = dmabounce_register_dev(&dev->dev, 1024, 4096);
+			if (ret) {
+				printk("SA1111: Failed to register %s with dmabounce", dev->dev.bus_id);
+				kfree(dev);
+				device_unregister(dev);
+			}
+		}
 	}
+
+out:
 	return ret;
 }
 
@@ -742,61 +756,31 @@ static void __sa1111_remove(struct sa1111 *sachip)
  *
  * This routine only identifies whether or not a given DMA address
  * is susceptible to the bug.
+ *
+ * This should only get called for sa1111_device types due to the
+ * way we configure our device dma_masks.
  */
-int sa1111_check_dma_bug(dma_addr_t addr)
+int dma_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
 {
-	struct sa1111 *sachip = g_sa1111;
 	unsigned int physaddr = SA1111_DMA_ADDR((unsigned int)addr);
-	unsigned int smcr;
+	u32 dma_mask = *dev->dma_mask;
 
-	/* Section 4.6 of the "Intel StrongARM SA-1111 Development Module
+	/*
+	 * Section 4.6 of the "Intel StrongARM SA-1111 Development Module
 	 * User's Guide" mentions that jumpers R51 and R52 control the
 	 * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or
 	 * SDRAM bank 1 on Neponset). The default configuration selects
 	 * Assabet, so any address in bank 1 is necessarily invalid.
 	 */
-	if ((machine_is_assabet() || machine_is_pfs168()) && addr >= 0xc8000000)
-	  	return -1;
+	if ((machine_is_assabet() || machine_is_pfs168()) &&
+		(addr >= 0xc8000000 || (addr + size) >= 0xc8000000))
+	  	return 1;
 
-	/* The bug only applies to buffers located more than one megabyte
-	 * above the start of the target bank:
+	/*
+	 * Check to see if either the start or end are illegal.
 	 */
-	if (physaddr<(1<<20))
-		return 0;
-
-	smcr = sa1111_readl(sachip->base + SA1111_SMCR);
-	switch (FExtr(smcr, SMCR_DRAC)) {
-	case 01: /* 10 row + bank address bits, A<20> must not be set */
-	  	if (physaddr & (1<<20))
-		  	return -1;
-		break;
-	case 02: /* 11 row + bank address bits, A<23> must not be set */
-	  	if (physaddr & (1<<23))
-		  	return -1;
-		break;
-	case 03: /* 12 row + bank address bits, A<24> must not be set */
-	  	if (physaddr & (1<<24))
-		  	return -1;
-		break;
-	case 04: /* 13 row + bank address bits, A<25> must not be set */
-	  	if (physaddr & (1<<25))
-		  	return -1;
-		break;
-	case 05: /* 14 row + bank address bits, A<20> must not be set */
-	  	if (physaddr & (1<<20))
-		  	return -1;
-		break;
-	case 06: /* 15 row + bank address bits, A<20> must not be set */
-	  	if (physaddr & (1<<20))
-		  	return -1;
-		break;
-	default:
-	  	printk(KERN_ERR "%s(): invalid SMCR DRAC value 0%lo\n",
-		       __FUNCTION__, FExtr(smcr, SMCR_DRAC));
-		return -1;
-	}
-
-	return 0;
+	return ((addr & ~(*dev->dma_mask))) ||
+		((addr + size - 1) & ~(*dev->dma_mask));
 }
 
 struct sa1111_save_data {
@@ -1293,7 +1277,6 @@ module_exit(sa1111_exit);
 MODULE_DESCRIPTION("Intel Corporation SA1111 core driver");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(sa1111_check_dma_bug);
 EXPORT_SYMBOL(sa1111_select_audio_mode);
 EXPORT_SYMBOL(sa1111_set_audio_rate);
 EXPORT_SYMBOL(sa1111_get_audio_rate);
diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h
index 595fdd7451b602fed8b655f29af431999c36a86e..dda22e24ce767c8772da56e5038446f83f603fe4 100644
--- a/include/asm-arm/atomic.h
+++ b/include/asm-arm/atomic.h
@@ -44,7 +44,7 @@ static inline void atomic_set(atomic_t *v, int i)
 	: "cc");
 }
 
-static inline void atomic_add(int i, volatile atomic_t *v)
+static inline void atomic_add(int i, atomic_t *v)
 {
 	unsigned long tmp, tmp2;
 
@@ -59,7 +59,7 @@ static inline void atomic_add(int i, volatile atomic_t *v)
 	: "cc");
 }
 
-static inline void atomic_sub(int i, volatile atomic_t *v)
+static inline void atomic_sub(int i, atomic_t *v)
 {
 	unsigned long tmp, tmp2;
 
@@ -77,7 +77,7 @@ static inline void atomic_sub(int i, volatile atomic_t *v)
 #define atomic_inc(v)	atomic_add(1, v)
 #define atomic_dec(v)	atomic_sub(1, v)
 
-static inline int atomic_dec_and_test(volatile atomic_t *v)
+static inline int atomic_dec_and_test(atomic_t *v)
 {
 	unsigned long tmp;
 	int result;
@@ -95,7 +95,7 @@ static inline int atomic_dec_and_test(volatile atomic_t *v)
 	return result == 0;
 }
 
-static inline int atomic_add_negative(int i, volatile atomic_t *v)
+static inline int atomic_add_negative(int i, atomic_t *v)
 {
 	unsigned long tmp;
 	int result;
@@ -138,7 +138,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 
 #define atomic_set(v,i)	(((v)->counter) = (i))
 
-static inline void atomic_add(int i, volatile atomic_t *v)
+static inline void atomic_add(int i, atomic_t *v)
 {
 	unsigned long flags;
 
@@ -147,7 +147,7 @@ static inline void atomic_add(int i, volatile atomic_t *v)
 	local_irq_restore(flags);
 }
 
-static inline void atomic_sub(int i, volatile atomic_t *v)
+static inline void atomic_sub(int i, atomic_t *v)
 {
 	unsigned long flags;
 
@@ -156,7 +156,7 @@ static inline void atomic_sub(int i, volatile atomic_t *v)
 	local_irq_restore(flags);
 }
 
-static inline void atomic_inc(volatile atomic_t *v)
+static inline void atomic_inc(atomic_t *v)
 {
 	unsigned long flags;
 
@@ -165,7 +165,7 @@ static inline void atomic_inc(volatile atomic_t *v)
 	local_irq_restore(flags);
 }
 
-static inline void atomic_dec(volatile atomic_t *v)
+static inline void atomic_dec(atomic_t *v)
 {
 	unsigned long flags;
 
@@ -174,7 +174,7 @@ static inline void atomic_dec(volatile atomic_t *v)
 	local_irq_restore(flags);
 }
 
-static inline int atomic_dec_and_test(volatile atomic_t *v)
+static inline int atomic_dec_and_test(atomic_t *v)
 {
 	unsigned long flags;
 	int val;
@@ -187,7 +187,7 @@ static inline int atomic_dec_and_test(volatile atomic_t *v)
 	return val == 0;
 }
 
-static inline int atomic_add_negative(int i, volatile atomic_t *v)
+static inline int atomic_add_negative(int i, atomic_t *v)
 {
 	unsigned long flags;
 	int val;
diff --git a/include/asm-arm/div64.h b/include/asm-arm/div64.h
index 4957da3df2706371a44b6956b1ef19a585ec1b67..3682616804ca42ef67014f9f400c97daaa1bf9de 100644
--- a/include/asm-arm/div64.h
+++ b/include/asm-arm/div64.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ARM_DIV64
 #define __ASM_ARM_DIV64
 
+#include <asm/system.h>
+
 /*
  * The semantics of do_div() are:
  *
@@ -31,7 +33,11 @@
 	register unsigned long long __n   asm("r0") = n;	\
 	register unsigned long long __res asm("r2");		\
 	register unsigned int __rem       asm(__xh);		\
-	asm("bl	__do_div64"					\
+	asm(	__asmeq("%0", __xh)				\
+		__asmeq("%1", "r2")				\
+		__asmeq("%2", "r0")				\
+		__asmeq("%3", "r4")				\
+		"bl	__do_div64"				\
 		: "=r" (__rem), "=r" (__res)			\
 		: "r" (__n), "r" (__base)			\
 		: "ip", "lr", "cc");				\
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index c65d9e38ddc7e6fc1cb8524fe8f5101591953c04..011c539c7449bc0dc243db2e816dcf55ad0c9225 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -16,29 +16,6 @@
  */
 extern void consistent_sync(void *kaddr, size_t size, int rw);
 
-/*
- * For SA-1111 these functions are "magic" and utilize bounce
- * bufferes as needed to work around SA-1111 DMA bugs.
- */
-dma_addr_t sa1111_map_single(struct device *dev, void *, size_t, enum dma_data_direction);
-void sa1111_unmap_single(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
-int sa1111_map_sg(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
-void sa1111_unmap_sg(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
-void sa1111_dma_sync_single_for_cpu(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
-void sa1111_dma_sync_single_for_device(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
-void sa1111_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
-void sa1111_dma_sync_sg_for_device(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
-
-#ifdef CONFIG_SA1111
-
-extern struct bus_type sa1111_bus_type;
-
-#define dmadev_is_sa1111(dev)	((dev)->bus == &sa1111_bus_type)
-
-#else
-#define dmadev_is_sa1111(dev)	(0)
-#endif
-
 /*
  * Return whether the given device DMA address mask can be supported
  * properly.  For example, if your device can only drive the low 24-bits
@@ -70,6 +47,14 @@ static inline int dma_is_consistent(dma_addr_t handle)
 	return 0;
 }
 
+/*
+ * DMA errors are defined by all-bits-set in the DMA address.
+ */
+static inline int dma_mapping_error(dma_addr_t dma_addr)
+{
+	return dma_addr == ~0;
+}
+
 /**
  * dma_alloc_coherent - allocate consistent memory for DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -118,6 +103,7 @@ dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int
 #define dma_free_writecombine(dev,size,cpu_addr,handle) \
 	dma_free_coherent(dev,size,cpu_addr,handle)
 
+
 /**
  * dma_map_single - map a single buffer for streaming DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -132,16 +118,17 @@ dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int
  * can regain ownership by calling dma_unmap_single() or
  * dma_sync_single_for_cpu().
  */
+#ifndef CONFIG_DMABOUNCE
 static inline dma_addr_t
 dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 	       enum dma_data_direction dir)
 {
-	if (dmadev_is_sa1111(dev))
-		return sa1111_map_single(dev, cpu_addr, size, dir);
-
 	consistent_sync(cpu_addr, size, dir);
 	return __virt_to_bus((unsigned long)cpu_addr);
 }
+#else
+extern dma_addr_t dma_map_single(struct device *,void *, size_t, enum dma_data_direction);
+#endif
 
 /**
  * dma_map_page - map a portion of a page for streaming DMA
@@ -180,15 +167,16 @@ dma_map_page(struct device *dev, struct page *page,
  * After this call, reads by the CPU to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
+#ifndef CONFIG_DMABOUNCE
 static inline void
 dma_unmap_single(struct device *dev, dma_addr_t handle, size_t size,
 		 enum dma_data_direction dir)
 {
-	if (dmadev_is_sa1111(dev))
-		sa1111_unmap_single(dev, handle, size, dir);
-
 	/* nothing to do */
 }
+#else
+extern void dma_unmap_single(struct device *, dma_addr_t, size_t, enum dma_data_direction);
+#endif
 
 /**
  * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
@@ -233,15 +221,13 @@ dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
  * Device ownership issues as mentioned above for dma_map_single are
  * the same here.
  */
+#ifndef CONFIG_DMABOUNCE
 static inline int
 dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	   enum dma_data_direction dir)
 {
 	int i;
 
-	if (dmadev_is_sa1111(dev))
-		return sa1111_map_sg(dev, sg, nents, dir);
-
 	for (i = 0; i < nents; i++, sg++) {
 		char *virt;
 
@@ -252,6 +238,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 
 	return nents;
 }
+#else
+extern int dma_map_sg(struct device *, struct scatterlist *, int, enum dma_data_direction);
+#endif
 
 /**
  * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
@@ -264,17 +253,18 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
  * Again, CPU read rules concerning calls here are the same as for
  * dma_unmap_single() above.
  */
+#ifndef CONFIG_DMABOUNCE
 static inline void
 dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 	     enum dma_data_direction dir)
 {
-	if (dmadev_is_sa1111(dev)) {
-		sa1111_unmap_sg(dev, sg, nents, dir);
-		return;
-	}
 
 	/* nothing to do */
 }
+#else
+extern void dma_unmap_sg(struct device *, struct scatterlist *, int, enum dma_data_direction);
+#endif
+
 
 /**
  * dma_sync_single_for_cpu
@@ -293,15 +283,11 @@ dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
  * must first the perform a dma_sync_for_device, and then the
  * device again owns the buffer.
  */
+#ifndef CONFIG_DMABOUNCE
 static inline void
 dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size,
 			enum dma_data_direction dir)
 {
-	if (dmadev_is_sa1111(dev)) {
-		sa1111_dma_sync_single_for_cpu(dev, handle, size, dir);
-		return;
-	}
-
 	consistent_sync((void *)__bus_to_virt(handle), size, dir);
 }
 
@@ -309,13 +295,13 @@ static inline void
 dma_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size,
 			   enum dma_data_direction dir)
 {
-	if (dmadev_is_sa1111(dev)) {
-		sa1111_dma_sync_single_for_device(dev, handle, size, dir);
-		return;
-	}
-
 	consistent_sync((void *)__bus_to_virt(handle), size, dir);
 }
+#else
+extern void dma_sync_single_for_cpu(struct device*, dma_addr_t, size_t, enum dma_data_direction);
+extern void dma_sync_single_for_device(struct device*, dma_addr_t, size_t, enum dma_data_direction);
+#endif
+
 
 /**
  * dma_sync_sg_for_cpu
@@ -330,17 +316,13 @@ dma_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size,
  * The same as dma_sync_single_for_* but for a scatter-gather list,
  * same rules and usage.
  */
+#ifndef CONFIG_DMABOUNCE
 static inline void
 dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
 		    enum dma_data_direction dir)
 {
 	int i;
 
-	if (dmadev_is_sa1111(dev)) {
-		sa1111_dma_sync_sg_for_cpu(dev, sg, nents, dir);
-		return;
-	}
-
 	for (i = 0; i < nents; i++, sg++) {
 		char *virt = page_address(sg->page) + sg->offset;
 		consistent_sync(virt, sg->length, dir);
@@ -353,24 +335,73 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
 {
 	int i;
 
-	if (dmadev_is_sa1111(dev)) {
-		sa1111_dma_sync_sg_for_device(dev, sg, nents, dir);
-		return;
-	}
-
 	for (i = 0; i < nents; i++, sg++) {
 		char *virt = page_address(sg->page) + sg->offset;
 		consistent_sync(virt, sg->length, dir);
 	}
 }
+#else
+extern void dma_sync_sg_for_cpu(struct device*, struct scatterlist*, int, enum dma_data_direction);
+extern void dma_sync_sg_for_device(struct device*, struct scatterlist*, int, enum dma_data_direction);
+#endif
 
+#ifdef CONFIG_DMABOUNCE
 /*
- * DMA errors are defined by all-bits-set in the DMA address.
+ * For SA-1111, IXP425, and ADI systems  the dma-mapping functions are "magic"
+ * and utilize bounce buffers as needed to work around limited DMA windows.
+ *
+ * On the SA-1111, a bug limits DMA to only certain regions of RAM.
+ * On the IXP425, the PCI inbound window is 64MB (256MB total RAM)
+ * On some ADI engineering sytems, PCI inbound window is 32MB (12MB total RAM)
+ *
+ * The following are helper functions used by the dmabounce subystem
+ *
  */
-static inline int dma_mapping_error(dma_addr_t dma_addr)
-{
-	return dma_addr == ~0;
-}
+
+/**
+ * dmabounce_register_dev
+ *
+ * @dev: valid struct device pointer
+ * @small_buf_size: size of buffers to use with small buffer pool
+ * @large_buf_size: size of buffers to use with large buffer pool (can be 0)
+ *
+ * This function should be called by low-level platform code to register
+ * a device as requireing DMA buffer bouncing. The function will allocate
+ * appropriate DMA pools for the device.
+ *
+ */
+extern int dmabounce_register_dev(struct device *, unsigned long, unsigned long);
+
+/**
+ * dmabounce_unregister_dev
+ *
+ * @dev: valid struct device pointer
+ *
+ * This function should be called by low-level platform code when device
+ * that was previously registered with dmabounce_register_dev is removed
+ * from the system.
+ *
+ */
+extern void dmabounce_unregister_dev(struct device *);
+
+/**
+ * dma_needs_bounce
+ *
+ * @dev: valid struct device pointer
+ * @dma_handle: dma_handle of unbounced buffer
+ * @size: size of region being mapped
+ *
+ * Platforms that utilize the dmabounce mechanism must implement
+ * this function.
+ *
+ * The dmabounce routines call this function whenever a dma-mapping
+ * is requested to determine whether a given buffer needs to be bounced
+ * or not. The function must return 0 if the the buffer is OK for
+ * DMA access and 1 if the buffer needs to be bounced.
+ *
+ */
+extern int dma_needs_bounce(struct device*, dma_addr_t, size_t);
+#endif /* CONFIG_DMABOUNCE */
 
 #endif /* __KERNEL__ */
 #endif
diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h
index 4e19a5719fe82757db30d314ab8e4d9f1ada2292..dc7ef45d3b4e57c80b58bf783ccad0b444975c53 100644
--- a/include/asm-arm/system.h
+++ b/include/asm-arm/system.h
@@ -42,6 +42,15 @@
 #define CR_XP	(1 << 23)	/* Extended page tables			*/
 #define CR_VE	(1 << 24)	/* Vectored interrupts			*/
 
+/*
+ * This is used to ensure the compiler did actually allocate the register we
+ * asked it for some inline assembly sequences.  Apparently we can't trust
+ * the compiler from one version to another so a bit of paranoia won't hurt.
+ * This string is meant to be concatenated with the inline asm string and
+ * will cause compilation to stop on mismatch.
+ */
+#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+
 #ifndef __ASSEMBLY__
 
 #include <linux/kernel.h>
diff --git a/include/asm-arm/uaccess.h b/include/asm-arm/uaccess.h
index 3703b43c2f570cb9ce4f5724ba4bd406d9d91ccd..119745a6dd7c6c9e82c72bc38e563f154bf50824 100644
--- a/include/asm-arm/uaccess.h
+++ b/include/asm-arm/uaccess.h
@@ -15,6 +15,7 @@
 #include <asm/errno.h>
 #include <asm/arch/memory.h>
 #include <asm/domain.h>
+#include <asm/system.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -107,7 +108,9 @@ extern int __get_user_8(void *);
 extern int __get_user_bad(void);
 
 #define __get_user_x(__r1,__p,__e,__s,__i...)				\
-	   __asm__ __volatile__ ("bl	__get_user_" #__s		\
+	   __asm__ __volatile__ (					\
+		__asmeq("%0", "r0") __asmeq("%1", "r1")			\
+		"bl	__get_user_" #__s				\
 		: "=&r" (__e), "=r" (__r1)				\
 		: "0" (__p)						\
 		: __i, "cc")
@@ -223,7 +226,9 @@ extern int __put_user_8(void *, unsigned long long);
 extern int __put_user_bad(void);
 
 #define __put_user_x(__r1,__p,__e,__s)					\
-	   __asm__ __volatile__ ("bl	__put_user_" #__s		\
+	   __asm__ __volatile__ (					\
+		__asmeq("%0", "r0") __asmeq("%2", "r1")			\
+		"bl	__put_user_" #__s				\
 		: "=&r" (__e)						\
 		: "0" (__p), "r" (__r1)					\
 		: "ip", "lr", "cc")