Commit d0055df0 authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'topic/dma' into next

Merge hch's big DMA rework series. This is in a topic branch in case he
wants to merge it to minimise conflicts.
parents 637cfeb9 4a605e2d
......@@ -119,9 +119,6 @@ config GENERIC_HWEIGHT
bool
default y
config ARCH_HAS_DMA_SET_COHERENT_MASK
bool
config PPC
bool
default y
......@@ -130,7 +127,6 @@ config PPC
#
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
......@@ -887,6 +883,7 @@ config FSL_SOC
config FSL_PCI
bool
select ARCH_HAS_DMA_SET_MASK
select PPC_INDIRECT_PCI
select PCI_QUIRKS
......
......@@ -19,6 +19,11 @@ struct iommu_table;
* drivers/macintosh/macio_asic.c
*/
struct dev_archdata {
/*
* Set to %true if the dma_iommu_ops are requested to use a direct
* window instead of dynamically mapping memory.
*/
bool iommu_bypass : 1;
/*
* These two used to be a union. However, with the hybrid ops we need
* both so here we store both a DMA offset for direct mappings and
......@@ -33,9 +38,6 @@ struct dev_archdata {
#ifdef CONFIG_IOMMU_API
void *iommu_domain;
#endif
#ifdef CONFIG_SWIOTLB
dma_addr_t max_direct_dma_addr;
#endif
#ifdef CONFIG_PPC64
struct pci_dn *pci_data;
#endif
......@@ -54,6 +56,4 @@ struct pdev_archdata {
u64 dma_mask;
};
#define ARCH_HAS_DMA_GET_REQUIRED_MASK
#endif /* _ASM_POWERPC_DEVICE_H */
......@@ -4,26 +4,24 @@
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
#ifdef CONFIG_SWIOTLB
struct dev_archdata *sd = &dev->archdata;
if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr)
return false;
#endif
if (!dev->dma_mask)
return false;
return addr + size - 1 <= *dev->dma_mask;
return addr + size - 1 <=
min_not_zero(*dev->dma_mask, dev->bus_dma_mask);
}
static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
return paddr + get_dma_offset(dev);
if (!dev)
return paddr + PCI_DRAM_OFFSET;
return paddr + dev->archdata.dma_offset;
}
static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
return daddr - get_dma_offset(dev);
if (!dev)
return daddr - PCI_DRAM_OFFSET;
return daddr - dev->archdata.dma_offset;
}
#endif /* ASM_POWERPC_DMA_DIRECT_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2004 IBM
*
* Implements the generic device dma API for powerpc.
* the pci and vio busses
*/
#ifndef _ASM_DMA_MAPPING_H
#define _ASM_DMA_MAPPING_H
#ifdef __KERNEL__
#include <linux/types.h>
#include <linux/cache.h>
/* need struct page definitions */
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/dma-debug.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
/* Some dma direct funcs must be visible for use in other dma_ops */
extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs);
extern void __dma_nommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs);
extern int dma_nommu_mmap_coherent(struct device *dev,
struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t handle,
size_t size, unsigned long attrs);
#ifdef CONFIG_NOT_COHERENT_CACHE
/*
* DMA-consistent mapping functions for PowerPCs that don't support
* cache snooping. These allocate/free a region of uncached mapped
* memory space for use with DMA devices. Alternatively, you could
* allocate the space "normally" and use the cache management functions
* to ensure it is consistent.
*/
struct device;
extern void __dma_sync(void *vaddr, size_t size, int direction);
extern void __dma_sync_page(struct page *page, unsigned long offset,
size_t size, int direction);
extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
#else /* ! CONFIG_NOT_COHERENT_CACHE */
/*
* Cache coherent cores.
*/
#define __dma_sync(addr, size, rw) ((void)0)
#define __dma_sync_page(pg, off, sz, rw) ((void)0)
#endif /* ! CONFIG_NOT_COHERENT_CACHE */
static inline unsigned long device_to_mask(struct device *dev)
{
if (dev->dma_mask && *dev->dma_mask)
return *dev->dma_mask;
/* Assume devices without mask can take 32 bit addresses */
return 0xfffffffful;
}
/*
* Available generic sets of operations
*/
#ifdef CONFIG_PPC64
extern struct dma_map_ops dma_iommu_ops;
#endif
extern const struct dma_map_ops dma_nommu_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
......@@ -80,31 +15,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
return NULL;
}
/*
* get_dma_offset()
*
* Get the dma offset on configurations where the dma address can be determined
* from the physical address by looking at a simple offset. Direct dma and
* swiotlb use this function, but it is typically not used by implementations
* with an iommu.
*/
static inline dma_addr_t get_dma_offset(struct device *dev)
{
if (dev)
return dev->archdata.dma_offset;
return PCI_DRAM_OFFSET;
}
static inline void set_dma_offset(struct device *dev, dma_addr_t off)
{
if (dev)
dev->archdata.dma_offset = off;
}
#define HAVE_ARCH_DMA_SET_MASK 1
extern u64 __dma_get_required_mask(struct device *dev);
#endif /* __KERNEL__ */
#endif /* _ASM_DMA_MAPPING_H */
......@@ -237,6 +237,7 @@ static inline void iommu_del_device(struct device *dev)
}
#endif /* !CONFIG_IOMMU_API */
u64 dma_iommu_get_required_mask(struct device *dev);
#else
static inline void *get_iommu_table_base(struct device *dev)
......@@ -318,5 +319,21 @@ extern void iommu_release_ownership(struct iommu_table *tbl);
extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
#ifdef CONFIG_PPC_CELL_NATIVE
extern bool iommu_fixed_is_weak;
#else
#define iommu_fixed_is_weak false
#endif
extern const struct dma_map_ops dma_iommu_ops;
static inline unsigned long device_to_mask(struct device *dev)
{
if (dev->dma_mask && *dev->dma_mask)
return *dev->dma_mask;
/* Assume devices without mask can take 32 bit addresses */
return 0xfffffffful;
}
#endif /* __KERNEL__ */
#endif /* _ASM_IOMMU_H */
......@@ -47,9 +47,7 @@ struct machdep_calls {
#endif
#endif /* CONFIG_PPC64 */
/* Platform set_dma_mask and dma_get_required_mask overrides */
int (*dma_set_mask)(struct device *dev, u64 dma_mask);
u64 (*dma_get_required_mask)(struct device *dev);
void (*dma_set_mask)(struct device *dev, u64 dma_mask);
int (*probe)(void);
void (*setup_arch)(void); /* Optional, may be NULL */
......
......@@ -19,6 +19,8 @@ struct device_node;
struct pci_controller_ops {
void (*dma_dev_setup)(struct pci_dev *pdev);
void (*dma_bus_setup)(struct pci_bus *bus);
bool (*iommu_bypass_supported)(struct pci_dev *pdev,
u64 mask);
int (*probe_mode)(struct pci_bus *bus);
......@@ -43,9 +45,6 @@ struct pci_controller_ops {
void (*teardown_msi_irqs)(struct pci_dev *pdev);
#endif
int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask);
u64 (*dma_get_required_mask)(struct pci_dev *pdev);
void (*shutdown)(struct pci_controller *hose);
};
......
......@@ -52,10 +52,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
#ifdef CONFIG_PCI
extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops);
extern const struct dma_map_ops *get_pci_dma_ops(void);
#else /* CONFIG_PCI */
#define set_pci_dma_ops(d)
#define get_pci_dma_ops() NULL
#endif
#ifdef CONFIG_PPC64
......
......@@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
extern pgd_t swapper_pg_dir[];
int dma_pfn_limit_to_zone(u64 pfn_limit);
extern void paging_init(void);
/*
......
......@@ -13,12 +13,7 @@
#include <linux/swiotlb.h>
extern const struct dma_map_ops powerpc_swiotlb_dma_ops;
extern unsigned int ppc_swiotlb_enable;
int __init swiotlb_setup_bus_notifier(void);
extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
#ifdef CONFIG_SWIOTLB
void swiotlb_detect_4g(void);
......
......@@ -36,7 +36,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o dma.o misc_$(BITS).o \
udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
......@@ -105,6 +105,7 @@ obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
......
......@@ -6,12 +6,31 @@
* busses using the iommu infrastructure
*/
#include <linux/dma-direct.h>
#include <linux/pci.h>
#include <asm/iommu.h>
/*
* Generic iommu implementation
*/
/*
* The coherent mask may be smaller than the real mask, check if we can
* really use a direct window.
*/
static inline bool dma_iommu_alloc_bypass(struct device *dev)
{
return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
dma_direct_supported(dev, dev->coherent_dma_mask);
}
static inline bool dma_iommu_map_bypass(struct device *dev,
unsigned long attrs)
{
return dev->archdata.iommu_bypass &&
(!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
}
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
......@@ -20,6 +39,8 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
if (dma_iommu_alloc_bypass(dev))
return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
......@@ -29,7 +50,11 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
if (dma_iommu_alloc_bypass(dev))
dma_direct_free(dev, size, vaddr, dma_handle, attrs);
else
iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
......@@ -42,6 +67,9 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction direction,
unsigned long attrs)
{
if (dma_iommu_map_bypass(dev, attrs))
return dma_direct_map_page(dev, page, offset, size, direction,
attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
size, device_to_mask(dev), direction, attrs);
}
......@@ -51,8 +79,9 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
attrs);
if (!dma_iommu_map_bypass(dev, attrs))
iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
direction, attrs);
}
......@@ -60,6 +89,8 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
if (dma_iommu_map_bypass(dev, attrs))
return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
device_to_mask(dev), direction, attrs);
}
......@@ -68,10 +99,20 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
if (!dma_iommu_map_bypass(dev, attrs))
ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
}
static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
return phb->controller_ops.iommu_bypass_supported &&
phb->controller_ops.iommu_bypass_supported(pdev, mask);
}
/* We support DMA to/from any memory page via the iommu */
int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
......@@ -83,32 +124,48 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
return 0;
}
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
mask, tbl->it_offset << tbl->it_page_shift);
return 0;
} else
return 1;
}
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
dev->archdata.iommu_bypass = false;
return 1;
}
static u64 dma_iommu_get_required_mask(struct device *dev)
u64 dma_iommu_get_required_mask(struct device *dev)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
if (!tbl)
return 0;
if (dev_is_pci(dev)) {
u64 bypass_mask = dma_direct_get_required_mask(dev);
if (dma_iommu_bypass_supported(dev, bypass_mask))
return bypass_mask;
}
mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
mask += mask - 1;
return mask;
}
struct dma_map_ops dma_iommu_ops = {
const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
.mmap = dma_nommu_mmap_coherent,
.map_sg = dma_iommu_map_sg,
.unmap_sg = dma_iommu_unmap_sg,
.dma_supported = dma_iommu_dma_supported,
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-mapping.h>
#include <linux/export.h>
#include <asm/machdep.h>
void arch_dma_set_mask(struct device *dev, u64 dma_mask)
{
if (ppc_md.dma_set_mask)
ppc_md.dma_set_mask(dev, dma_mask);
}
EXPORT_SYMBOL(arch_dma_set_mask);
......@@ -10,101 +10,12 @@
* option) any later version.
*
*/
#include <linux/dma-direct.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pci.h>
#include <asm/machdep.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
unsigned int ppc_swiotlb_enable;
static u64 swiotlb_powerpc_get_required(struct device *dev)
{
u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
end = memblock_end_of_DRAM();
if (max_direct_dma_addr && end > max_direct_dma_addr)
end = max_direct_dma_addr;
end += get_dma_offset(dev);
mask = 1ULL << (fls64(end) - 1);
mask += mask - 1;
return mask;
}
/*
* At the moment, all platforms that use this code only require
* swiotlb to be used if we're operating on HIGHMEM. Since
* we don't ever call anything other than map_sg, unmap_sg,
* map_page, and unmap_page on highmem, use normal dma_ops
* for everything else.
*/
const struct dma_map_ops powerpc_swiotlb_dma_ops = {
.alloc = __dma_nommu_alloc_coherent,
.free = __dma_nommu_free_coherent,
.mmap = dma_nommu_mmap_coherent,
.map_sg = dma_direct_map_sg,
.unmap_sg = dma_direct_unmap_sg,
.dma_supported = swiotlb_dma_supported,
.map_page = dma_direct_map_page,
.unmap_page = dma_direct_unmap_page,
.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
.sync_single_for_device = dma_direct_sync_single_for_device,
.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
.sync_sg_for_device = dma_direct_sync_sg_for_device,
.get_required_mask = swiotlb_powerpc_get_required,
};
void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
{
struct pci_controller *hose;
struct dev_archdata *sd;
hose = pci_bus_to_host(pdev->bus);
sd = &pdev->dev.archdata;
sd->max_direct_dma_addr =
hose->dma_window_base_cur + hose->dma_window_size;
}
static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
struct dev_archdata *sd;
/* We are only intereted in device addition */
if (action != BUS_NOTIFY_ADD_DEVICE)
return 0;
sd = &dev->archdata;
sd->max_direct_dma_addr = 0;
/* May need to bounce if the device can't address all of DRAM */
if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
set_dma_ops(dev, &powerpc_swiotlb_dma_ops);
return NOTIFY_DONE;
}
static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
.notifier_call = ppc_swiotlb_bus_notify,
.priority = 0,
};
int __init swiotlb_setup_bus_notifier(void)
{
bus_register_notifier(&platform_bus_type,
&ppc_swiotlb_plat_bus_notifier);
return 0;
}
void __init swiotlb_detect_4g(void)
{
if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
......
/*
* Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
*
* Provide default implementations of the DMA mapping callbacks for
* directly mapped busses.
*/
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/pci.h>
#include <asm/vio.h>
#include <asm/bug.h>
#include <asm/machdep.h>
#include <asm/swiotlb.h>
#include <asm/iommu.h>
/*
* Generic direct DMA implementation
*
* This implementation supports a per-device offset that can be applied if
* the address at which memory is visible to devices is not 0. Platform code
* can set archdata.dma_data to an unsigned long holding the offset. By
* default the offset is PCI_DRAM_OFFSET.
*/
static u64 __maybe_unused get_pfn_limit(struct device *dev)
{
u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
struct dev_archdata __maybe_unused *sd = &dev->archdata;
#ifdef CONFIG_SWIOTLB
if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops)
pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
#endif
return pfn;
}
static int dma_nommu_dma_supported(struct device *dev, u64 mask)
{
#ifdef CONFIG_PPC64
u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
/* Limit fits in the mask, we are good */
if (mask >= limit)
return 1;
#ifdef CONFIG_FSL_SOC
/*
* Freescale gets another chance via ZONE_DMA, however
* that will have to be refined if/when they support iommus
*/
return 1;
#endif
/* Sorry ... */
return 0;
#else
return 1;
#endif
}
#ifndef CONFIG_NOT_COHERENT_CACHE
void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
void *ret;
struct page *page;
int node = dev_to_node(dev);
#ifdef CONFIG_FSL_SOC
u64 pfn = get_pfn_limit(dev);
int zone;
/*
* This code should be OK on other platforms, but we have drivers that
* don't set coherent_dma_mask. As a workaround we just ifdef it. This
* whole routine needs some serious cleanup.
*/
zone = dma_pfn_limit_to_zone(pfn);
if (zone < 0) {
dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
__func__, pfn);
return NULL;
}
switch (zone) {
#ifdef CONFIG_ZONE_DMA
case ZONE_DMA:
flag |= GFP_DMA;
break;
#endif
};
#endif /* CONFIG_FSL_SOC */
page = alloc_pages_node(node, flag, get_order(size));
if (page == NULL)
return NULL;
ret = page_address(page);
memset(ret, 0, size);
*dma_handle = __pa(ret) + get_dma_offset(dev);
return ret;
}
void __dma_nommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
free_pages((unsigned long)vaddr, get_order(size));
}
#endif /* !CONFIG_NOT_COHERENT_CACHE */
static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
struct iommu_table *iommu;
/* The coherent mask may be smaller than the real mask, check if
* we can really use the direct ops
*/
if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
return __dma_nommu_alloc_coherent(dev, size, dma_handle,
flag, attrs);
/* Ok we can't ... do we have an iommu ? If not, fail */
iommu = get_iommu_table_base(dev);
if (!iommu)
return NULL;
/* Try to use the iommu */
return iommu_alloc_coherent(dev, iommu, size, dma_handle,
dev->coherent_dma_mask, flag,
dev_to_node(dev));
}
static void dma_nommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
struct iommu_table *iommu;
/* See comments in dma_nommu_alloc_coherent() */
if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
attrs);
/* Maybe we used an iommu ... */
iommu = get_iommu_table_base(dev);
/* If we hit that we should have never allocated in the first
* place so how come we are freeing ?
*/
if (WARN_ON(!iommu))
return;
iommu_free_coherent(iommu, size, vaddr, dma_handle);
}
int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t handle, size_t size,
unsigned long attrs)
{
unsigned long pfn;
#ifdef CONFIG_NOT_COHERENT_CACHE
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
#else
pfn = page_to_pfn(virt_to_page(cpu_addr));
#endif
return remap_pfn_range(vma, vma->vm_start,
pfn + vma->vm_pgoff,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
}
static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction,
unsigned long attrs)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i) {
sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
sg->dma_length = sg->length;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
continue;
__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}
return nents;
}
static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction,
unsigned long attrs)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i)
__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}
static u64 dma_nommu_get_required_mask(struct device *dev)
{
u64 end, mask;
end = memblock_end_of_DRAM() + get_dma_offset(dev);
mask = 1ULL << (fls64(end) - 1);
mask += mask - 1;
return mask;
}
static inline dma_addr_t dma_nommu_map_page(struct device *dev,
struct page *page,
unsigned long offset,
size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
__dma_sync_page(page, offset, size, dir);
return page_to_phys(page) + offset + get_dma_offset(dev);
}
static inline void dma_nommu_unmap_page(struct device *dev,
dma_addr_t dma_address,
size_t size,
enum dma_data_direction direction,
unsigned long attrs)
{
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
__dma_sync(bus_to_virt(dma_address), size, direction);
}
#ifdef CONFIG_NOT_COHERENT_CACHE
static inline void dma_nommu_sync_sg(struct device *dev,
struct scatterlist *sgl, int nents,
enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i)
__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}
static inline void dma_nommu_sync_single(struct device *dev,
dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction)
{
__dma_sync(bus_to_virt(dma_handle), size, direction);
}
#endif
const struct dma_map_ops dma_nommu_ops = {
.alloc = dma_nommu_alloc_coherent,
.free = dma_nommu_free_coherent,
.mmap = dma_nommu_mmap_coherent,
.map_sg = dma_nommu_map_sg,
.unmap_sg = dma_nommu_unmap_sg,
.dma_supported = dma_nommu_dma_supported,
.map_page = dma_nommu_map_page,
.unmap_page = dma_nommu_unmap_page,
.get_required_mask = dma_nommu_get_required_mask,
#ifdef CONFIG_NOT_COHERENT_CACHE
.sync_single_for_cpu = dma_nommu_sync_single,
.sync_single_for_device = dma_nommu_sync_single,
.sync_sg_for_cpu = dma_nommu_sync_sg,
.sync_sg_for_device = dma_nommu_sync_sg,
#endif
};
EXPORT_SYMBOL(dma_nommu_ops);
int dma_set_coherent_mask(struct device *dev, u64 mask)
{
if (!dma_supported(dev, mask)) {
/*
* We need to special case the direct DMA ops which can
* support a fallback for coherent allocations. There
* is no dma_op->set_coherent_mask() so we have to do
* things the hard way:
*/
if (get_dma_ops(dev) != &dma_nommu_ops ||
get_iommu_table_base(dev) == NULL ||
!dma_iommu_dma_supported(dev, mask))
return -EIO;
}
dev->coherent_dma_mask = mask;
return 0;
}
EXPORT_SYMBOL(dma_set_coherent_mask);
int dma_set_mask(struct device *dev, u64 dma_mask)
{
if (ppc_md.dma_set_mask)
return ppc_md.dma_set_mask(dev, dma_mask);
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
if (phb->controller_ops.dma_set_mask)
return phb->controller_ops.dma_set_mask(pdev, dma_mask);
}
if (!dev->dma_mask || !dma_supported(dev, dma_mask))
return -EIO;
*dev->dma_mask = dma_mask;
return 0;
}
EXPORT_SYMBOL(dma_set_mask);
u64 __dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *dma_ops = get_dma_ops(dev);
if (unlikely(dma_ops == NULL))
return 0;
if (dma_ops->get_required_mask)
return dma_ops->get_required_mask(dev);
return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
}
u64 dma_get_required_mask(struct device *dev)
{
if (ppc_md.dma_get_required_mask)
return ppc_md.dma_get_required_mask(dev);
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
if (phb->controller_ops.dma_get_required_mask)
return phb->controller_ops.dma_get_required_mask(pdev);
}
return __dma_get_required_mask(dev);
}
EXPORT_SYMBOL_GPL(dma_get_required_mask);
static int __init dma_init(void)
{
#ifdef CONFIG_IBMVIO
dma_debug_add_bus(&vio_bus_type);
#endif
return 0;
}
fs_initcall(dma_init);
......@@ -62,19 +62,13 @@ resource_size_t isa_mem_base;
EXPORT_SYMBOL(isa_mem_base);
static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops;
static const struct dma_map_ops *pci_dma_ops;
void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
const struct dma_map_ops *get_pci_dma_ops(void)
{
return pci_dma_ops;
}
EXPORT_SYMBOL(get_pci_dma_ops);
/*
* This function should run under locking protection, specifically
* hose_spinlock.
......@@ -972,7 +966,7 @@ static void pcibios_setup_device(struct pci_dev *dev)
/* Hook up default DMA ops */
set_dma_ops(&dev->dev, pci_dma_ops);
set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
/* Additional platform DMA/iommu setup */
phb = pci_bus_to_host(dev->bus);
......
......@@ -791,7 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
{
pdev->archdata.dma_mask = DMA_BIT_MASK(32);
pdev->dev.dma_mask = &pdev->archdata.dma_mask;
set_dma_ops(&pdev->dev, &dma_nommu_ops);
}
static __init void print_system_info(void)
......
......@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <linux/export.h>
#include <asm/tlbflush.h>
......@@ -151,8 +152,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
* Allocate DMA-coherent memory space and return both the kernel remapped
* virtual and bus address for that space.
*/
void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
struct page *page;
struct ppc_vm_region *c;
......@@ -253,7 +254,7 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
/*
* free a page as defined by the above mapping.
*/
void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
struct ppc_vm_region *c;
......@@ -313,7 +314,7 @@ void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
/*
* make an area consistent.
*/
void __dma_sync(void *vaddr, size_t size, int direction)
static void __dma_sync(void *vaddr, size_t size, int direction)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
......@@ -339,7 +340,6 @@ void __dma_sync(void *vaddr, size_t size, int direction)
break;
}
}
EXPORT_SYMBOL(__dma_sync);
#ifdef CONFIG_HIGHMEM
/*
......@@ -386,28 +386,42 @@ static inline void __dma_sync_page_highmem(struct page *page,
* __dma_sync_page makes memory consistent. identical to __dma_sync, but
* takes a struct page instead of a virtual address
*/
void __dma_sync_page(struct page *page, unsigned long offset,
size_t size, int direction)
static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
{
struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
unsigned offset = paddr & ~PAGE_MASK;
#ifdef CONFIG_HIGHMEM
__dma_sync_page_highmem(page, offset, size, direction);
__dma_sync_page_highmem(page, offset, size, dir);
#else
unsigned long start = (unsigned long)page_address(page) + offset;
__dma_sync((void *)start, size, direction);
__dma_sync((void *)start, size, dir);
#endif
}
EXPORT_SYMBOL(__dma_sync_page);
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
__dma_sync_page(paddr, size, dir);
}
void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
__dma_sync_page(paddr, size, dir);
}
/*
* Return the PFN for a given cpu virtual address returned by
* __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent()
* Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
*/
unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
dma_addr_t dma_addr)
{
/* This should always be populated, so we don't test every
* level. If that fails, we'll have a nice crash which
* will be as good as a BUG_ON()
*/
unsigned long cpu_addr = (unsigned long)vaddr;
pgd_t *pgd = pgd_offset_k(cpu_addr);
pud_t *pud = pud_offset(pgd, cpu_addr);
pmd_t *pmd = pmd_offset(pud, cpu_addr);
......
......@@ -69,15 +69,12 @@ pte_t *kmap_pte;
EXPORT_SYMBOL(kmap_pte);
pgprot_t kmap_prot;
EXPORT_SYMBOL(kmap_prot);
#define TOP_ZONE ZONE_HIGHMEM
static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
vaddr), vaddr), vaddr);
}
#else
#define TOP_ZONE ZONE_NORMAL
#endif
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
......@@ -228,25 +225,6 @@ static int __init mark_nonram_nosave(void)
*/
static unsigned long max_zone_pfns[MAX_NR_ZONES];
/*
* Find the least restrictive zone that is entirely below the
* specified pfn limit. Returns < 0 if no suitable zone is found.
*
* pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
* systems -- the DMA limit can be higher than any possible real pfn.
*/
int dma_pfn_limit_to_zone(u64 pfn_limit)
{
int i;
for (i = TOP_ZONE; i >= 0; i--) {
if (max_zone_pfns[i] <= pfn_limit)
return i;
}
return -EPERM;
}
/*
* paging_init() sets up the page tables - in fact we've already done this.
*/
......
......@@ -34,6 +34,7 @@
#include <asm/ppc4xx.h>
#include <asm/mpic.h>
#include <asm/mmu.h>
#include <asm/swiotlb.h>
#include <linux/pci.h>
#include <linux/i2c.h>
......
......@@ -47,7 +47,7 @@ static int __init warp_probe(void)
if (!of_machine_is_compatible("pika,warp"))
return 0;
/* For __dma_nommu_alloc_coherent */
/* For arch_dma_alloc */
ISA_DMA_THRESHOLD = ~0L;
return 1;
......
......@@ -27,6 +27,7 @@
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/ehv_pic.h>
#include <asm/swiotlb.h>
#include <soc/fsl/qe/qe_ic.h>
#include <linux/of_platform.h>
......@@ -223,7 +224,3 @@ define_machine(corenet_generic) {
};
machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
#ifdef CONFIG_SWIOTLB
machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
#endif
......@@ -202,8 +202,6 @@ static int __init ge_imp3a_probe(void)
machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier);
define_machine(ge_imp3a) {
.name = "GE_IMP3A",
.probe = ge_imp3a_probe,
......
......@@ -57,8 +57,6 @@ static void __init mpc8536_ds_setup_arch(void)
machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
*/
......
......@@ -174,10 +174,6 @@ machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
machine_arch_initcall(p2020_ds, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
*/
......
......@@ -367,10 +367,6 @@ machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier);
static void __init mpc85xx_mds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
......
......@@ -55,7 +55,6 @@ static void __init p1010_rdb_setup_arch(void)
}
machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
......
......@@ -548,8 +548,6 @@ static void __init p1022_ds_setup_arch(void)
machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
*/
......
......@@ -128,8 +128,6 @@ static void __init p1022_rdk_setup_arch(void)
machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
*/
......
......@@ -22,6 +22,7 @@
#include <asm/time.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include "smp.h"
......
......@@ -121,7 +121,6 @@ static int __init declare_of_platform_devices(void)
return 0;
}
machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices);
machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
define_machine(mpc86xx_hpcn) {
.name = "MPC86xx HPCN",
......
......@@ -408,6 +408,9 @@ config NOT_COHERENT_CACHE
bool
depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
GAMECUBE_COMMON || AMIGAONE
select ARCH_HAS_DMA_COHERENT_TO_PFN
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
default n if PPC_47x
default y
......
......@@ -544,9 +544,10 @@ static struct cbe_iommu *cell_iommu_for_node(int nid)
static unsigned long cell_dma_nommu_offset;
static unsigned long dma_iommu_fixed_base;
static bool cell_iommu_enabled;
/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
static int iommu_fixed_is_weak;
bool iommu_fixed_is_weak;
static struct iommu_table *cell_get_iommu_table(struct device *dev)
{
......@@ -568,102 +569,19 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev)
return &window->table;
}
/* A coherent allocation implies strong ordering */
static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
if (iommu_fixed_is_weak)
return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
size, dma_handle,
device_to_mask(dev), flag,
dev_to_node(dev));
else
return dma_nommu_ops.alloc(dev, size, dma_handle, flag,
attrs);
}
static void dma_fixed_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
if (iommu_fixed_is_weak)
iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
dma_handle);
else
dma_nommu_ops.free(dev, size, vaddr, dma_handle, attrs);
}
static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction,
unsigned long attrs)
{
if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
return dma_nommu_ops.map_page(dev, page, offset, size,
direction, attrs);
else
return iommu_map_page(dev, cell_get_iommu_table(dev), page,
offset, size, device_to_mask(dev),
direction, attrs);
}
static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
dma_nommu_ops.unmap_page(dev, dma_addr, size, direction,
attrs);
else
iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
direction, attrs);
}
static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction direction,
unsigned long attrs)
{
if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
return dma_nommu_ops.map_sg(dev, sg, nents, direction, attrs);
else
return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
nents, device_to_mask(dev),
direction, attrs);
}
static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction direction,
unsigned long attrs)
{
if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
dma_nommu_ops.unmap_sg(dev, sg, nents, direction, attrs);
else
ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
direction, attrs);
}
static int dma_suported_and_switch(struct device *dev, u64 dma_mask);
static const struct dma_map_ops dma_iommu_fixed_ops = {
.alloc = dma_fixed_alloc_coherent,
.free = dma_fixed_free_coherent,
.map_sg = dma_fixed_map_sg,
.unmap_sg = dma_fixed_unmap_sg,
.dma_supported = dma_suported_and_switch,
.map_page = dma_fixed_map_page,
.unmap_page = dma_fixed_unmap_page,
};
static u64 cell_iommu_get_fixed_address(struct device *dev);
static void cell_dma_dev_setup(struct device *dev)
{
if (get_pci_dma_ops() == &dma_iommu_ops)
if (cell_iommu_enabled) {
u64 addr = cell_iommu_get_fixed_address(dev);
if (addr != OF_BAD_ADDR)
dev->archdata.dma_offset = addr + dma_iommu_fixed_base;
set_iommu_table_base(dev, cell_get_iommu_table(dev));
else if (get_pci_dma_ops() == &dma_nommu_ops)
set_dma_offset(dev, cell_dma_nommu_offset);
else
BUG();
} else {
dev->archdata.dma_offset = cell_dma_nommu_offset;
}
}
static void cell_pci_dma_dev_setup(struct pci_dev *dev)
......@@ -680,11 +598,9 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
if (action != BUS_NOTIFY_ADD_DEVICE)
return 0;
/* We use the PCI DMA ops */
dev->dma_ops = get_pci_dma_ops();
if (cell_iommu_enabled)
dev->dma_ops = &dma_iommu_ops;
cell_dma_dev_setup(dev);
return 0;
}
......@@ -809,7 +725,6 @@ static int __init cell_iommu_init_disabled(void)
unsigned long base = 0, size;
/* When no iommu is present, we use direct DMA ops */
set_pci_dma_ops(&dma_nommu_ops);
/* First make sure all IOC translation is turned off */
cell_disable_iommus();
......@@ -894,7 +809,11 @@ static u64 cell_iommu_get_fixed_address(struct device *dev)
const u32 *ranges = NULL;
int i, len, best, naddr, nsize, pna, range_size;
/* We can be called for platform devices that have no of_node */
np = of_node_get(dev->of_node);
if (!np)
goto out;
while (1) {
naddr = of_n_addr_cells(np);
nsize = of_n_size_cells(np);
......@@ -945,27 +864,10 @@ static u64 cell_iommu_get_fixed_address(struct device *dev)
return dev_addr;
}
static int dma_suported_and_switch(struct device *dev, u64 dma_mask)
static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask)
{
if (dma_mask == DMA_BIT_MASK(64) &&
cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) {
u64 addr = cell_iommu_get_fixed_address(dev) +
dma_iommu_fixed_base;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
set_dma_ops(dev, &dma_iommu_fixed_ops);
set_dma_offset(dev, addr);
return 1;
}
if (dma_iommu_dma_supported(dev, dma_mask)) {
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
set_dma_ops(dev, get_pci_dma_ops());
cell_dma_dev_setup(dev);
return 1;
}
return 0;
return mask == DMA_BIT_MASK(64) &&
cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR;
}
static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
......@@ -1119,9 +1021,8 @@ static int __init cell_iommu_fixed_mapping_init(void)
cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
}
dma_iommu_ops.dma_supported = dma_suported_and_switch;
set_pci_dma_ops(&dma_iommu_ops);
cell_pci_controller_ops.iommu_bypass_supported =
cell_pci_iommu_bypass_supported;
return 0;
}
......@@ -1142,7 +1043,7 @@ static int __init setup_iommu_fixed(char *str)
pciep = of_find_node_by_type(NULL, "pcie-endpoint");
if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
iommu_fixed_is_weak = DMA_ATTR_WEAK_ORDERING;
iommu_fixed_is_weak = true;
of_node_put(pciep);
......@@ -1150,26 +1051,6 @@ static int __init setup_iommu_fixed(char *str)
}
__setup("iommu_fixed=", setup_iommu_fixed);
static u64 cell_dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *dma_ops;
if (!dev->dma_mask)
return 0;
if (!iommu_fixed_disabled &&
cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
return DMA_BIT_MASK(64);
dma_ops = get_dma_ops(dev);
if (dma_ops->get_required_mask)
return dma_ops->get_required_mask(dev);
WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
return DMA_BIT_MASK(64);
}
static int __init cell_iommu_init(void)
{
struct device_node *np;
......@@ -1186,10 +1067,9 @@ static int __init cell_iommu_init(void)
/* Setup various callbacks */
cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
goto bail;
goto done;
/* Create an iommu for each /axon node. */
for_each_node_by_name(np, "axon") {
......@@ -1206,10 +1086,10 @@ static int __init cell_iommu_init(void)
continue;
cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
}
done:
/* Setup default PCI iommu ops */
set_pci_dma_ops(&dma_iommu_ops);
cell_iommu_enabled = true;
bail:
/* Register callbacks on OF platform device addition/removal
* to handle linking them to the right DMA operations
......
......@@ -186,7 +186,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
*/
if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
!firmware_has_feature(FW_FEATURE_LPAR)) {
dev->dev.dma_ops = &dma_nommu_ops;
dev->dev.dma_ops = NULL;
/*
* Set the coherent DMA mask to prevent the iommu
* being used unnecessarily
......
......@@ -411,55 +411,6 @@ static int pas_machine_check_handler(struct pt_regs *regs)
return !!(srr1 & 0x2);
}
#ifdef CONFIG_PCMCIA
static int pcmcia_notify(struct notifier_block *nb, unsigned long action,
void *data)
{
struct device *dev = data;
struct device *parent;
struct pcmcia_device *pdev = to_pcmcia_dev(dev);
/* We are only intereted in device addition */
if (action != BUS_NOTIFY_ADD_DEVICE)
return 0;
parent = pdev->socket->dev.parent;
/* We know electra_cf devices will always have of_node set, since
* electra_cf is an of_platform driver.
*/
if (!parent->of_node)
return 0;
if (!of_device_is_compatible(parent->of_node, "electra-cf"))
return 0;
/* We use the direct ops for localbus */
dev->dma_ops = &dma_nommu_ops;
return 0;
}
static struct notifier_block pcmcia_notifier = {
.notifier_call = pcmcia_notify,
};
static inline void pasemi_pcmcia_init(void)
{
extern struct bus_type pcmcia_bus_type;
bus_register_notifier(&pcmcia_bus_type, &pcmcia_notifier);
}
#else
static inline void pasemi_pcmcia_init(void)
{
}
#endif
static const struct of_device_id pasemi_bus_ids[] = {
/* Unfortunately needed for legacy firmwares */
{ .type = "localbus", },
......@@ -472,8 +423,6 @@ static const struct of_device_id pasemi_bus_ids[] = {
static int __init pasemi_publish_devices(void)
{
pasemi_pcmcia_init();
/* Publish OF platform devices for SDC and other non-PCI devices */
of_platform_bus_probe(NULL, pasemi_bus_ids, NULL);
......
......@@ -216,7 +216,7 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
* their parent device so drivers shouldn't be doing DMA
* operations directly on these devices.
*/
set_dma_ops(&npe->pdev->dev, NULL);
set_dma_ops(&npe->pdev->dev, &dma_dummy_ops);
}
/*
......
......@@ -1746,7 +1746,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
set_dma_offset(&pdev->dev, pe->tce_bypass_base);
pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
/*
* Note: iommu_add_device() will fail here as
......@@ -1756,31 +1756,6 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
*/
}
static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
{
unsigned short vendor = 0;
struct pci_dev *pdev;
if (pe->device_count == 1)
return true;
/* pe->pdev should be set if it's a single device, pe->pbus if not */
if (!pe->pbus)
return true;
list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
if (!vendor) {
vendor = pdev->vendor;
continue;
}
if (pdev->vendor != vendor)
return false;
}
return true;
}
/*
* Reconfigure TVE#0 to be usable as 64-bit DMA space.
*
......@@ -1850,88 +1825,45 @@ static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
return -EIO;
}
static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
bypass = (dma_mask >= top);
u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
if (dma_mask >= top)
return true;
}
if (bypass) {
dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
set_dma_ops(&pdev->dev, &dma_nommu_ops);
} else {
/*
* If the device can't set the TCE bypass bit but still wants
* to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
* bypass the 32-bit region and be usable for 64-bit DMAs.
* The device needs to be able to address all of this space.
*/
if (dma_mask >> 32 &&
dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
pnv_pci_ioda_pe_single_vendor(pe) &&
phb->model == PNV_PHB_MODEL_PHB3) {
/* Configure the bypass mode */
rc = pnv_pci_ioda_dma_64bit_bypass(pe);
if (rc)
return rc;
/* 4GB offset bypasses 32-bit space */
set_dma_offset(&pdev->dev, (1ULL << 32));
set_dma_ops(&pdev->dev, &dma_nommu_ops);
} else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
/*
* Fail the request if a DMA mask between 32 and 64 bits
* was requested but couldn't be fulfilled. Ideally we
* would do this for 64-bits but historically we have
* always fallen back to 32-bits.
*/
return -ENOMEM;
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
}
/*
* If the device can't set the TCE bypass bit but still wants
* to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
* bypass the 32-bit region and be usable for 64-bit DMAs.
* The device needs to be able to address all of this space.
*/
if (dma_mask >> 32 &&
dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
/* pe->pdev should be set if it's a single device, pe->pbus if not */
(pe->device_count == 1 || !pe->pbus) &&
phb->model == PNV_PHB_MODEL_PHB3) {
/* Configure the bypass mode */
s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
if (rc)
return rc;
/* 4GB offset bypasses 32-bit space */
pdev->dev.archdata.dma_offset = (1ULL << 32);
return true;
}
*pdev->dev.dma_mask = dma_mask;
/* Update peer npu devices */
pnv_npu_try_dma_set_bypass(pdev, bypass);
return 0;
}
static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
u64 end, mask;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return 0;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (!pe->tce_bypass_enabled)
return __dma_get_required_mask(&pdev->dev);
end = pe->tce_bypass_base + memblock_end_of_DRAM();
mask = 1ULL << (fls64(end) - 1);
mask += mask - 1;
return mask;
return false;
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
......@@ -1940,7 +1872,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
set_dma_offset(&dev->dev, pe->tce_bypass_base);
dev->dev.archdata.dma_offset = pe->tce_bypass_base;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate);
......@@ -3659,6 +3591,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
.iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
......@@ -3666,19 +3599,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.window_alignment = pnv_pci_window_alignment,
.setup_bridge = pnv_pci_setup_bridge,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.dma_set_mask = pnv_pci_ioda_dma_set_mask,
.dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
.shutdown = pnv_pci_ioda_shutdown,
};
static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
{
dev_err_once(&npdev->dev,
"%s operation unsupported for NVLink devices\n",
__func__);
return -EPERM;
}
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.setup_msi_irqs = pnv_setup_msi_irqs,
......@@ -3686,7 +3609,6 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.dma_set_mask = pnv_npu_dma_set_mask,
.shutdown = pnv_pci_ioda_shutdown,
.disable_device = pnv_npu_disable_device,
};
......
......@@ -978,7 +978,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
* pdn: the parent pe node with the ibm,dma_window property
* Future: also check if we can remap the base window for our base page size
*
* returns the dma offset for use by dma_set_mask
* returns the dma offset for use by the direct mapped DMA code.
*/
static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
{
......@@ -1198,87 +1198,37 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
iommu_add_device(pci->table_group, &dev->dev);
}
static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
{
bool ddw_enabled = false;
struct device_node *pdn, *dn;
struct pci_dev *pdev;
struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
const __be32 *dma_window = NULL;
u64 dma_offset;
if (!dev->dma_mask)
return -EIO;
if (!dev_is_pci(dev))
goto check_mask;
pdev = to_pci_dev(dev);
/* only attempt to use a new window if 64-bit DMA is requested */
if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
dn = pci_device_to_OF_node(pdev);
dev_dbg(dev, "node is %pOF\n", dn);
if (dma_mask < DMA_BIT_MASK(64))
return false;
/*
* the device tree might contain the dma-window properties
* per-device and not necessarily for the bus. So we need to
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window)
break;
}
if (pdn && PCI_DN(pdn)) {
dma_offset = enable_ddw(pdev, pdn);
if (dma_offset != 0) {
dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
set_dma_offset(dev, dma_offset);
set_dma_ops(dev, &dma_nommu_ops);
ddw_enabled = true;
}
}
}
dev_dbg(&pdev->dev, "node is %pOF\n", dn);
/* fall back on iommu ops */
if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
dev_info(dev, "Restoring 32-bit DMA via iommu\n");
set_dma_ops(dev, &dma_iommu_ops);
/*
* the device tree might contain the dma-window properties
* per-device and not necessarily for the bus. So we need to
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window)
break;
}
check_mask:
if (!dma_supported(dev, dma_mask))
return -EIO;
*dev->dma_mask = dma_mask;
return 0;
}
static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
{
if (!dev->dma_mask)
return 0;
if (!disable_ddw && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
struct device_node *dn;
dn = pci_device_to_OF_node(pdev);
/* search upwards for ibm,dma-window */
for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
dn = dn->parent)
if (of_get_property(dn, "ibm,dma-window", NULL))
break;
/* if there is a ibm,ddw-applicable property require 64 bits */
if (dn && PCI_DN(dn) &&
of_get_property(dn, "ibm,ddw-applicable", NULL))
return DMA_BIT_MASK(64);
if (pdn && PCI_DN(pdn)) {
pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
if (pdev->dev.archdata.dma_offset)
return true;
}
return dma_iommu_ops.get_required_mask(dev);
return false;
}
static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
......@@ -1373,8 +1323,9 @@ void iommu_init_early_pSeries(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
if (!disable_ddw)
pseries_pci_controller_ops.iommu_bypass_supported =
iommu_bypass_supported_pSeriesLP;
} else {
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
......
......@@ -492,7 +492,9 @@ static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
return NULL;
}
ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs);
ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
if (unlikely(ret == NULL)) {
vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
atomic_inc(&viodev->cmo.allocs_failed);
......@@ -507,8 +509,7 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
{
struct vio_dev *viodev = to_vio_dev(dev);
dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs);
iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
}
......@@ -518,22 +519,22 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct iommu_table *tbl;
struct iommu_table *tbl = get_iommu_table_base(dev);
dma_addr_t ret = DMA_MAPPING_ERROR;
tbl = get_iommu_table_base(dev);
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
atomic_inc(&viodev->cmo.allocs_failed);
return ret;
}
ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
if (unlikely(dma_mapping_error(dev, ret))) {
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
atomic_inc(&viodev->cmo.allocs_failed);
}
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
goto out_fail;
ret = iommu_map_page(dev, tbl, page, offset, size, device_to_mask(dev),
direction, attrs);
if (unlikely(ret == DMA_MAPPING_ERROR))
goto out_deallocate;
return ret;
out_deallocate:
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
out_fail:
atomic_inc(&viodev->cmo.allocs_failed);
return DMA_MAPPING_ERROR;
}
static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
......@@ -542,11 +543,9 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct iommu_table *tbl;
tbl = get_iommu_table_base(dev);
dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
struct iommu_table *tbl = get_iommu_table_base(dev);
iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
}
......@@ -555,34 +554,32 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct iommu_table *tbl;
struct iommu_table *tbl = get_iommu_table_base(dev);
struct scatterlist *sgl;
int ret, count;
size_t alloc_size = 0;
tbl = get_iommu_table_base(dev);
for_each_sg(sglist, sgl, nelems, count)
alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
if (vio_cmo_alloc(viodev, alloc_size)) {
atomic_inc(&viodev->cmo.allocs_failed);
return 0;
}
ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
if (unlikely(!ret)) {
vio_cmo_dealloc(viodev, alloc_size);
atomic_inc(&viodev->cmo.allocs_failed);
return ret;
}
if (vio_cmo_alloc(viodev, alloc_size))
goto out_fail;
ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, device_to_mask(dev),
direction, attrs);
if (unlikely(!ret))
goto out_deallocate;
for_each_sg(sglist, sgl, ret, count)
alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
if (alloc_size)
vio_cmo_dealloc(viodev, alloc_size);
return ret;
out_deallocate:
vio_cmo_dealloc(viodev, alloc_size);
out_fail:
atomic_inc(&viodev->cmo.allocs_failed);
return 0;
}
static void vio_dma_iommu_unmap_sg(struct device *dev,
......@@ -591,40 +588,27 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct iommu_table *tbl;
struct iommu_table *tbl = get_iommu_table_base(dev);
struct scatterlist *sgl;
size_t alloc_size = 0;
int count;
tbl = get_iommu_table_base(dev);
for_each_sg(sglist, sgl, nelems, count)
alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
vio_cmo_dealloc(viodev, alloc_size);
}
static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask)
{
return dma_iommu_ops.dma_supported(dev, mask);
}
static u64 vio_dma_get_required_mask(struct device *dev)
{
return dma_iommu_ops.get_required_mask(dev);
}
static const struct dma_map_ops vio_dma_mapping_ops = {
.alloc = vio_dma_iommu_alloc_coherent,
.free = vio_dma_iommu_free_coherent,
.mmap = dma_nommu_mmap_coherent,
.map_sg = vio_dma_iommu_map_sg,
.unmap_sg = vio_dma_iommu_unmap_sg,
.map_page = vio_dma_iommu_map_page,
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = vio_dma_iommu_dma_supported,
.get_required_mask = vio_dma_get_required_mask,
.dma_supported = dma_iommu_dma_supported,
.get_required_mask = dma_iommu_get_required_mask,
};
/**
......@@ -1715,3 +1699,10 @@ int vio_disable_interrupts(struct vio_dev *dev)
}
EXPORT_SYMBOL(vio_disable_interrupts);
#endif /* CONFIG_PPC_PSERIES */
static int __init vio_init(void)
{
dma_debug_add_bus(&vio_bus_type);
return 0;
}
fs_initcall(vio_init);
......@@ -360,13 +360,6 @@ static void iommu_table_dart_setup(void)
set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
}
static void pci_dma_dev_setup_dart(struct pci_dev *dev)
{
if (dart_is_u4)
set_dma_offset(&dev->dev, DART_U4_BYPASS_BASE);
set_iommu_table_base(&dev->dev, &iommu_table_dart);
}
static void pci_dma_bus_setup_dart(struct pci_bus *bus)
{
if (!iommu_table_dart_inited) {
......@@ -390,27 +383,18 @@ static bool dart_device_on_pcie(struct device *dev)
return false;
}
static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
static void pci_dma_dev_setup_dart(struct pci_dev *dev)
{
if (!dev->dma_mask || !dma_supported(dev, dma_mask))
return -EIO;
/* U4 supports a DART bypass, we use it for 64-bit capable
* devices to improve performances. However, that only works
* for devices connected to U4 own PCIe interface, not bridged
* through hypertransport. We need the device to support at
* least 40 bits of addresses.
*/
if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) {
dev_info(dev, "Using 64-bit DMA iommu bypass\n");
set_dma_ops(dev, &dma_nommu_ops);
} else {
dev_info(dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(dev, &dma_iommu_ops);
}
if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
set_iommu_table_base(&dev->dev, &iommu_table_dart);
}
*dev->dma_mask = dma_mask;
return 0;
static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
{
return dart_is_u4 &&
dart_device_on_pcie(&dev->dev) &&
mask >= DMA_BIT_MASK(40);
}
void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
......@@ -428,26 +412,20 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
/* Initialize the DART HW */
if (dart_init(dn) != 0)
goto bail;
/* Setup bypass if supported */
if (dart_is_u4)
ppc_md.dma_set_mask = dart_dma_set_mask;
return;
/*
* U4 supports a DART bypass, we use it for 64-bit capable devices to
* improve performance. However, that only works for devices connected
* to the U4 own PCIe interface, not bridged through hypertransport.
* We need the device to support at least 40 bits of addresses.
*/
controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
/* Setup pci_dma ops */
set_pci_dma_ops(&dma_iommu_ops);
return;
bail:
/* If init failed, use direct iommu and null setup functions */
controller_ops->dma_dev_setup = NULL;
controller_ops->dma_bus_setup = NULL;
/* Setup pci_dma ops */
set_pci_dma_ops(&dma_nommu_ops);
}
#ifdef CONFIG_PM
......
......@@ -40,6 +40,7 @@
#include <asm/mpc85xx.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
#include <asm/swiotlb.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
......@@ -114,33 +115,33 @@ static struct pci_ops fsl_indirect_pcie_ops =
static u64 pci64_dma_offset;
#ifdef CONFIG_SWIOTLB
static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
pdev->dev.bus_dma_mask =
hose->dma_window_base_cur + hose->dma_window_size;
}
static void setup_swiotlb_ops(struct pci_controller *hose)
{
if (ppc_swiotlb_enable) {
if (ppc_swiotlb_enable)
hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
set_pci_dma_ops(&powerpc_swiotlb_dma_ops);
}
}
#else
static inline void setup_swiotlb_ops(struct pci_controller *hose) {}
#endif
static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
{
if (!dev->dma_mask || !dma_supported(dev, dma_mask))
return -EIO;
/*
* Fix up PCI devices that are able to DMA to the large inbound
* mapping that allows addressing any RAM address from across PCI.
*/
if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
set_dma_ops(dev, &dma_nommu_ops);
set_dma_offset(dev, pci64_dma_offset);
dev->bus_dma_mask = 0;
dev->archdata.dma_offset = pci64_dma_offset;
}
*dev->dma_mask = dma_mask;
return 0;
}
static int setup_one_atmu(struct ccsr_pci __iomem *pci,
......
......@@ -43,8 +43,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
return false;
}
set_dma_ops(&dev->dev, &dma_nommu_ops);
set_dma_offset(&dev->dev, PAGE_OFFSET);
dev->dev.archdata.dma_offset = PAGE_OFFSET;
/*
* Allocate a context to do cxl things too. If we eventually do real
......
......@@ -1716,6 +1716,7 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = -ENODEV;
goto out;
}
dma_set_mask(&mac->dma_pdev->dev, DMA_BIT_MASK(64));
mac->iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
if (!mac->iob_pdev) {
......
......@@ -60,9 +60,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target);
extern int
swiotlb_dma_supported(struct device *hwdev, u64 mask);
#ifdef CONFIG_SWIOTLB
extern enum swiotlb_force swiotlb_force;
extern phys_addr_t io_tlb_start, io_tlb_end;
......
......@@ -16,6 +16,9 @@ config ARCH_DMA_ADDR_T_64BIT
config ARCH_HAS_DMA_COHERENCE_H
bool
config ARCH_HAS_DMA_SET_MASK
bool
config HAVE_GENERIC_DMA_COHERENT
bool
......
......@@ -132,8 +132,7 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
goto again;
}
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) {
if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) {
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
goto again;
}
......
......@@ -207,7 +207,6 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
}
EXPORT_SYMBOL(dma_mmap_attrs);
#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
static u64 dma_default_get_required_mask(struct device *dev)
{
u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
......@@ -238,7 +237,6 @@ u64 dma_get_required_mask(struct device *dev)
return dma_default_get_required_mask(dev);
}
EXPORT_SYMBOL_GPL(dma_get_required_mask);
#endif
#ifndef arch_dma_alloc_attrs
#define arch_dma_alloc_attrs(dev) (true)
......@@ -318,18 +316,23 @@ int dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_supported);
#ifndef HAVE_ARCH_DMA_SET_MASK
#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK
void arch_dma_set_mask(struct device *dev, u64 mask);
#else
#define arch_dma_set_mask(dev, mask) do { } while (0)
#endif
int dma_set_mask(struct device *dev, u64 mask)
{
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
arch_dma_set_mask(dev, mask);
dma_check_mask(dev, mask);
*dev->dma_mask = mask;
return 0;
}
EXPORT_SYMBOL(dma_set_mask);
#endif
#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
int dma_set_coherent_mask(struct device *dev, u64 mask)
......
......@@ -648,15 +648,3 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
return true;
}
/*
* Return whether the given device DMA address mask can be supported
* properly. For example, if your device can only drive the low 24-bits
* during bus mastering, then you would pass 0x00ffffff as the mask to
* this function.
*/
int
swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment