Commit c40785ad authored by Benjamin Herrenschmidt's avatar Benjamin Herrenschmidt Committed by Michael Ellerman

powerpc/dart: Use a cachable DART

Instead of punching a hole in the linear mapping, just use normal
cachable memory, and apply the flush sequence documented in the
CPC625 (aka U3) user manual.

This allows us to remove quite a bit of code related to the early
allocation of the DART and the hole in the linear mapping. We can
also get rid of the copy of the DART for suspend/resume as the
original memory can just be saved/restored now, as long as we
properly sync the caches.
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
[mpe: Integrate dart_init() fix to return ENODEV when DART disabled]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent de4cf3de
...@@ -273,7 +273,6 @@ extern void iommu_init_early_pSeries(void); ...@@ -273,7 +273,6 @@ extern void iommu_init_early_pSeries(void);
extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
extern void iommu_init_early_pasemi(void); extern void iommu_init_early_pasemi(void);
extern void alloc_dart_table(void);
#if defined(CONFIG_PPC64) && defined(CONFIG_PM) #if defined(CONFIG_PPC64) && defined(CONFIG_PM)
static inline void iommu_save(void) static inline void iommu_save(void)
{ {
......
...@@ -87,10 +87,6 @@ ...@@ -87,10 +87,6 @@
* *
*/ */
#ifdef CONFIG_U3_DART
extern unsigned long dart_tablebase;
#endif /* CONFIG_U3_DART */
static unsigned long _SDR1; static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs); EXPORT_SYMBOL_GPL(mmu_psize_defs);
...@@ -846,34 +842,6 @@ static void __init htab_initialize(void) ...@@ -846,34 +842,6 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot); base, size, prot);
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
* in such a way that it will not cross two memblock regions and
* will fit within a single 16Mb page.
* The DART space is assumed to be a full 16Mb region even if
* we only use 2Mb of that space. We will use more of it later
* for AGP GART. We have to use a full 16Mb large page.
*/
DBG("DART base: %lx\n", dart_tablebase);
if (dart_tablebase != 0 && dart_tablebase >= base
&& dart_tablebase < (base + size)) {
unsigned long dart_table_end = dart_tablebase + 16 * MB;
if (base != dart_tablebase)
BUG_ON(htab_bolt_mapping(base, dart_tablebase,
__pa(base), prot,
mmu_linear_psize,
mmu_kernel_ssize));
if ((base + size) > dart_table_end)
BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
base + size,
__pa(dart_table_end),
prot,
mmu_linear_psize,
mmu_kernel_ssize));
continue;
}
#endif /* CONFIG_U3_DART */
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize)); prot, mmu_linear_psize, mmu_kernel_ssize));
} }
......
...@@ -305,13 +305,6 @@ static int __init maple_probe(void) ...@@ -305,13 +305,6 @@ static int __init maple_probe(void)
if (!of_flat_dt_is_compatible(root, "Momentum,Maple") && if (!of_flat_dt_is_compatible(root, "Momentum,Maple") &&
!of_flat_dt_is_compatible(root, "Momentum,Apache")) !of_flat_dt_is_compatible(root, "Momentum,Apache"))
return 0; return 0;
/*
* On U3, the DART (iommu) must be allocated now since it
* has an impact on htab_initialize (due to the large page it
* occupies having to be broken up so the DART itself is not
* part of the cacheable linar mapping
*/
alloc_dart_table();
hpte_init_native(); hpte_init_native();
pm_power_off = maple_power_off; pm_power_off = maple_power_off;
......
...@@ -607,14 +607,6 @@ static int __init pmac_probe(void) ...@@ -607,14 +607,6 @@ static int __init pmac_probe(void)
return 0; return 0;
#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC64
/*
* On U3, the DART (iommu) must be allocated now since it
* has an impact on htab_initialize (due to the large page it
* occupies having to be broken up so the DART itself is not
* part of the cacheable linar mapping
*/
alloc_dart_table();
hpte_init_native(); hpte_init_native();
#endif #endif
......
...@@ -48,16 +48,10 @@ ...@@ -48,16 +48,10 @@
#include "dart.h" #include "dart.h"
/* Physical base address and size of the DART table */ /* DART table address and size */
unsigned long dart_tablebase; /* exported to htab_initialize */ static u32 *dart_tablebase;
static unsigned long dart_tablesize; static unsigned long dart_tablesize;
/* Virtual base address of the DART table */
static u32 *dart_vbase;
#ifdef CONFIG_PM
static u32 *dart_copy;
#endif
/* Mapped base address for the dart */ /* Mapped base address for the dart */
static unsigned int __iomem *dart; static unsigned int __iomem *dart;
...@@ -151,6 +145,34 @@ static inline void dart_tlb_invalidate_one(unsigned long bus_rpn) ...@@ -151,6 +145,34 @@ static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
spin_unlock_irqrestore(&invalidate_lock, flags); spin_unlock_irqrestore(&invalidate_lock, flags);
} }
static void dart_cache_sync(unsigned int *base, unsigned int count)
{
/*
* We add 1 to the number of entries to flush, following a
* comment in Darwin indicating that the memory controller
* can prefetch unmapped memory under some circumstances.
*/
unsigned long start = (unsigned long)base;
unsigned long end = start + (count + 1) * sizeof(unsigned int);
unsigned int tmp;
/* Perform a standard cache flush */
flush_inval_dcache_range(start, end);
/*
* Perform the sequence described in the CPC925 manual to
* ensure all the data gets to a point the cache incoherent
* DART hardware will see.
*/
asm volatile(" sync;"
" isync;"
" dcbf 0,%1;"
" sync;"
" isync;"
" lwz %0,0(%1);"
" isync" : "=r" (tmp) : "r" (end) : "memory");
}
static void dart_flush(struct iommu_table *tbl) static void dart_flush(struct iommu_table *tbl)
{ {
mb(); mb();
...@@ -165,13 +187,13 @@ static int dart_build(struct iommu_table *tbl, long index, ...@@ -165,13 +187,13 @@ static int dart_build(struct iommu_table *tbl, long index,
enum dma_data_direction direction, enum dma_data_direction direction,
struct dma_attrs *attrs) struct dma_attrs *attrs)
{ {
unsigned int *dp; unsigned int *dp, *orig_dp;
unsigned int rpn; unsigned int rpn;
long l; long l;
DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
dp = ((unsigned int*)tbl->it_base) + index; orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
/* On U3, all memory is contiguous, so we can move this /* On U3, all memory is contiguous, so we can move this
* out of the loop. * out of the loop.
...@@ -184,11 +206,7 @@ static int dart_build(struct iommu_table *tbl, long index, ...@@ -184,11 +206,7 @@ static int dart_build(struct iommu_table *tbl, long index,
uaddr += DART_PAGE_SIZE; uaddr += DART_PAGE_SIZE;
} }
dart_cache_sync(orig_dp, npages);
/* make sure all updates have reached memory */
mb();
in_be32((unsigned __iomem *)dp);
mb();
if (dart_is_u4) { if (dart_is_u4) {
rpn = index; rpn = index;
...@@ -203,7 +221,8 @@ static int dart_build(struct iommu_table *tbl, long index, ...@@ -203,7 +221,8 @@ static int dart_build(struct iommu_table *tbl, long index,
static void dart_free(struct iommu_table *tbl, long index, long npages) static void dart_free(struct iommu_table *tbl, long index, long npages)
{ {
unsigned int *dp; unsigned int *dp, *orig_dp;
long orig_npages = npages;
/* We don't worry about flushing the TLB cache. The only drawback of /* We don't worry about flushing the TLB cache. The only drawback of
* not doing it is that we won't catch buggy device drivers doing * not doing it is that we won't catch buggy device drivers doing
...@@ -212,34 +231,30 @@ static void dart_free(struct iommu_table *tbl, long index, long npages) ...@@ -212,34 +231,30 @@ static void dart_free(struct iommu_table *tbl, long index, long npages)
DBG("dart: free at: %lx, %lx\n", index, npages); DBG("dart: free at: %lx, %lx\n", index, npages);
dp = ((unsigned int *)tbl->it_base) + index; orig_dp = dp = ((unsigned int *)tbl->it_base) + index;
while (npages--) while (npages--)
*(dp++) = dart_emptyval; *(dp++) = dart_emptyval;
}
dart_cache_sync(orig_dp, orig_npages);
}
static int __init dart_init(struct device_node *dart_node) static void allocate_dart(void)
{ {
unsigned int i; unsigned long tmp;
unsigned long tmp, base, size;
struct resource r;
if (dart_tablebase == 0 || dart_tablesize == 0) { /* 512 pages (2MB) is max DART tablesize. */
printk(KERN_INFO "DART: table not allocated, using " dart_tablesize = 1UL << 21;
"direct DMA\n");
return -ENODEV;
}
if (of_address_to_resource(dart_node, 0, &r))
panic("DART: can't get register base ! ");
/* Make sure nothing from the DART range remains in the CPU cache /*
* from a previous mapping that existed before the kernel took * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
* over * will blow up an entire large page anyway in the kernel mapping.
*/ */
flush_dcache_phys_range(dart_tablebase, dart_tablebase = __va(memblock_alloc_base(1UL<<24,
dart_tablebase + dart_tablesize); 1UL<<24, 0x80000000L));
/* There is no point scanning the DART space for leaks*/
kmemleak_no_scan((void *)dart_tablebase);
/* Allocate a spare page to map all invalid DART pages. We need to do /* Allocate a spare page to map all invalid DART pages. We need to do
* that to work around what looks like a problem with the HT bridge * that to work around what looks like a problem with the HT bridge
...@@ -249,20 +264,51 @@ static int __init dart_init(struct device_node *dart_node) ...@@ -249,20 +264,51 @@ static int __init dart_init(struct device_node *dart_node)
dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
DARTMAP_RPNMASK); DARTMAP_RPNMASK);
printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
}
static int __init dart_init(struct device_node *dart_node)
{
unsigned int i;
unsigned long base, size;
struct resource r;
/* IOMMU disabled by the user ? bail out */
if (iommu_is_off)
return -ENODEV;
/*
* Only use the DART if the machine has more than 1GB of RAM
* or if requested with iommu=on on cmdline.
*
* 1GB of RAM is picked as limit because some default devices
* (i.e. Airport Extreme) have 30 bit address range limits.
*/
if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
return -ENODEV;
/* Get DART registers */
if (of_address_to_resource(dart_node, 0, &r))
panic("DART: can't get register base ! ");
/* Map in DART registers */ /* Map in DART registers */
dart = ioremap(r.start, resource_size(&r)); dart = ioremap(r.start, resource_size(&r));
if (dart == NULL) if (dart == NULL)
panic("DART: Cannot map registers!"); panic("DART: Cannot map registers!");
/* Map in DART table */ /* Allocate the DART and dummy page */
dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize); allocate_dart();
/* Fill initial table */ /* Fill initial table */
for (i = 0; i < dart_tablesize/4; i++) for (i = 0; i < dart_tablesize/4; i++)
dart_vbase[i] = dart_emptyval; dart_tablebase[i] = dart_emptyval;
/* Push to memory */
dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
/* Initialize DART with table base and enable it. */ /* Initialize DART with table base and enable it. */
base = dart_tablebase >> DART_PAGE_SHIFT; base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
size = dart_tablesize >> DART_PAGE_SHIFT; size = dart_tablesize >> DART_PAGE_SHIFT;
if (dart_is_u4) { if (dart_is_u4) {
size &= DART_SIZE_U4_SIZE_MASK; size &= DART_SIZE_U4_SIZE_MASK;
...@@ -301,7 +347,7 @@ static void iommu_table_dart_setup(void) ...@@ -301,7 +347,7 @@ static void iommu_table_dart_setup(void)
iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K; iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
/* Initialize the common IOMMU code */ /* Initialize the common IOMMU code */
iommu_table_dart.it_base = (unsigned long)dart_vbase; iommu_table_dart.it_base = (unsigned long)dart_tablebase;
iommu_table_dart.it_index = 0; iommu_table_dart.it_index = 0;
iommu_table_dart.it_blocksize = 1; iommu_table_dart.it_blocksize = 1;
iommu_table_dart.it_ops = &iommu_dart_ops; iommu_table_dart.it_ops = &iommu_dart_ops;
...@@ -404,75 +450,21 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) ...@@ -404,75 +450,21 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
} }
#ifdef CONFIG_PM #ifdef CONFIG_PM
static void iommu_dart_save(void)
{
memcpy(dart_copy, dart_vbase, 2*1024*1024);
}
static void iommu_dart_restore(void) static void iommu_dart_restore(void)
{ {
memcpy(dart_vbase, dart_copy, 2*1024*1024); dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
dart_tlb_invalidate_all(); dart_tlb_invalidate_all();
} }
static int __init iommu_init_late_dart(void) static int __init iommu_init_late_dart(void)
{ {
unsigned long tbasepfn;
struct page *p;
/* if no dart table exists then we won't need to save it
* and the area has also not been reserved */
if (!dart_tablebase) if (!dart_tablebase)
return 0; return 0;
tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT;
register_nosave_region_late(tbasepfn,
tbasepfn + ((1<<24) >> PAGE_SHIFT));
/* For suspend we need to copy the dart contents because
* it is not part of the regular mapping (see above) and
* thus not saved automatically. The memory for this copy
* must be allocated early because we need 2 MB. */
p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT);
BUG_ON(!p);
dart_copy = page_address(p);
ppc_md.iommu_save = iommu_dart_save;
ppc_md.iommu_restore = iommu_dart_restore; ppc_md.iommu_restore = iommu_dart_restore;
return 0; return 0;
} }
late_initcall(iommu_init_late_dart); late_initcall(iommu_init_late_dart);
#endif #endif /* CONFIG_PM */
void __init alloc_dart_table(void)
{
/* Only reserve DART space if machine has more than 1GB of RAM
* or if requested with iommu=on on cmdline.
*
* 1GB of RAM is picked as limit because some default devices
* (i.e. Airport Extreme) have 30 bit address range limits.
*/
if (iommu_is_off)
return;
if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
return;
/* 512 pages (2MB) is max DART tablesize. */
dart_tablesize = 1UL << 21;
/* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
* will blow up an entire large page anyway in the kernel mapping
*/
dart_tablebase = (unsigned long)
__va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
/*
* The DART space is later unmapped from the kernel linear mapping and
* accessing dart_tablebase during kmemleak scanning will fault.
*/
kmemleak_no_scan((void *)dart_tablebase);
printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment