Commit 03bb2d65 authored by Christophe Leroy's avatar Christophe Leroy Committed by Scott Wood

powerpc: get hugetlbpage handling more generic

Today there are two implementations of hugetlbpages which are managed
by exclusive #ifdefs:
* FSL_BOOKE: several directory entries points to the same single hugepage
* BOOK3S: one upper level directory entry points to a table of hugepages

In preparation of implementation of hugepage support on the 8xx, we
need a mix of the two above solutions, because the 8xx needs both cases
depending on the size of pages:
* In 4k page size mode, each PGD entry covers a 4M bytes area. It means
that 2 PGD entries will be necessary to cover an 8M hugepage while a
single PGD entry will cover 8x 512k hugepages.
* In 16 page size mode, each PGD entry covers a 64M bytes area. It means
that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
hugepages will be covers by one PGD entry.

This patch:
* removes #ifdefs in favor of if/else based on the range sizes
* merges the two huge_pte_alloc() functions as they are pretty similar
* merges the two hugetlbpage_init() functions as they are pretty similar
Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> (v3)
Signed-off-by: default avatarScott Wood <oss@buserror.net>
parent 9b081e10
...@@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, ...@@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
pte_t *new; pte_t *new;
#ifdef CONFIG_PPC_FSL_BOOK3E
int i; int i;
int num_hugepd = 1 << (pshift - pdshift); int num_hugepd;
if (pshift >= pdshift) {
cachep = hugepte_cache; cachep = hugepte_cache;
#else num_hugepd = 1 << (pshift - pdshift);
} else {
cachep = PGT_CACHE(pdshift - pshift); cachep = PGT_CACHE(pdshift - pshift);
#endif num_hugepd = 1;
}
new = kmem_cache_zalloc(cachep, GFP_KERNEL); new = kmem_cache_zalloc(cachep, GFP_KERNEL);
...@@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, ...@@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
smp_wmb(); smp_wmb();
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC_FSL_BOOK3E
/* /*
* We have multiple higher-level entries that point to the same * We have multiple higher-level entries that point to the same
* actual pte location. Fill in each as we go and backtrack on error. * actual pte location. Fill in each as we go and backtrack on error.
...@@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, ...@@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (unlikely(!hugepd_none(*hpdp))) if (unlikely(!hugepd_none(*hpdp)))
break; break;
else else
#ifdef CONFIG_PPC_BOOK3S_64
hpdp->pd = __pa(new) |
(shift_to_mmu_psize(pshift) << 2);
#else
/* We use the old format for PPC_FSL_BOOK3E */ /* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
} }
/* If we bailed from the for loop early, an error occurred, clean up */ /* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) { if (i < num_hugepd) {
...@@ -109,17 +116,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, ...@@ -109,17 +116,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
hpdp->pd = 0; hpdp->pd = 0;
kmem_cache_free(cachep, new); kmem_cache_free(cachep, new);
} }
#else
if (!hugepd_none(*hpdp))
kmem_cache_free(cachep, new);
else {
#ifdef CONFIG_PPC_BOOK3S_64
hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
#else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
}
#endif
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 0; return 0;
} }
...@@ -136,7 +132,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, ...@@ -136,7 +132,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#define HUGEPD_PUD_SHIFT PMD_SHIFT #define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif #endif
#ifdef CONFIG_PPC_BOOK3S_64
/* /*
* At this point we do the placement change only for BOOK3S 64. This would * At this point we do the placement change only for BOOK3S 64. This would
* possibly work on other subarchs. * possibly work on other subarchs.
...@@ -153,6 +148,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -153,6 +148,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
addr &= ~(sz-1); addr &= ~(sz-1);
pg = pgd_offset(mm, addr); pg = pgd_offset(mm, addr);
#ifdef CONFIG_PPC_BOOK3S_64
if (pshift == PGDIR_SHIFT) if (pshift == PGDIR_SHIFT)
/* 16GB huge page */ /* 16GB huge page */
return (pte_t *) pg; return (pte_t *) pg;
...@@ -178,32 +174,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -178,32 +174,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
hpdp = (hugepd_t *)pm; hpdp = (hugepd_t *)pm;
} }
} }
if (!hpdp)
return NULL;
BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;
return hugepte_offset(*hpdp, addr, pdshift);
}
#else #else
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
pgd_t *pg;
pud_t *pu;
pmd_t *pm;
hugepd_t *hpdp = NULL;
unsigned pshift = __ffs(sz);
unsigned pdshift = PGDIR_SHIFT;
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
if (pshift >= HUGEPD_PGD_SHIFT) { if (pshift >= HUGEPD_PGD_SHIFT) {
hpdp = (hugepd_t *)pg; hpdp = (hugepd_t *)pg;
} else { } else {
...@@ -217,7 +188,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -217,7 +188,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
hpdp = (hugepd_t *)pm; hpdp = (hugepd_t *)pm;
} }
} }
#endif
if (!hpdp) if (!hpdp)
return NULL; return NULL;
...@@ -228,7 +199,6 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -228,7 +199,6 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(*hpdp, addr, pdshift); return hugepte_offset(*hpdp, addr, pdshift);
} }
#endif
#ifdef CONFIG_PPC_FSL_BOOK3E #ifdef CONFIG_PPC_FSL_BOOK3E
/* Build list of addresses of gigantic pages. This function is used in early /* Build list of addresses of gigantic pages. This function is used in early
...@@ -310,7 +280,11 @@ static int __init do_gpage_early_setup(char *param, char *val, ...@@ -310,7 +280,11 @@ static int __init do_gpage_early_setup(char *param, char *val,
npages = 0; npages = 0;
if (npages > MAX_NUMBER_GPAGES) { if (npages > MAX_NUMBER_GPAGES) {
pr_warn("MMU: %lu pages requested for page " pr_warn("MMU: %lu pages requested for page "
#ifdef CONFIG_PHYS_ADDR_T_64BIT
"size %llu KB, limiting to " "size %llu KB, limiting to "
#else
"size %u KB, limiting to "
#endif
__stringify(MAX_NUMBER_GPAGES) "\n", __stringify(MAX_NUMBER_GPAGES) "\n",
npages, size / 1024); npages, size / 1024);
npages = MAX_NUMBER_GPAGES; npages = MAX_NUMBER_GPAGES;
...@@ -442,6 +416,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) ...@@ -442,6 +416,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
} }
put_cpu_var(hugepd_freelist_cur); put_cpu_var(hugepd_freelist_cur);
} }
#else
static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
#endif #endif
static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
...@@ -453,13 +429,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif ...@@ -453,13 +429,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
unsigned long pdmask = ~((1UL << pdshift) - 1); unsigned long pdmask = ~((1UL << pdshift) - 1);
unsigned int num_hugepd = 1; unsigned int num_hugepd = 1;
unsigned int shift = hugepd_shift(*hpdp);
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Note: On fsl the hpdp may be the first of several */ /* Note: On fsl the hpdp may be the first of several */
num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); if (shift > pdshift)
#else num_hugepd = 1 << (shift - pdshift);
unsigned int shift = hugepd_shift(*hpdp);
#endif
start &= pdmask; start &= pdmask;
if (start < floor) if (start < floor)
...@@ -475,11 +449,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif ...@@ -475,11 +449,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
for (i = 0; i < num_hugepd; i++, hpdp++) for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0; hpdp->pd = 0;
#ifdef CONFIG_PPC_FSL_BOOK3E if (shift >= pdshift)
hugepd_free(tlb, hugepte); hugepd_free(tlb, hugepte);
#else else
pgtable_free_tlb(tlb, hugepte, pdshift - shift); pgtable_free_tlb(tlb, hugepte, pdshift - shift);
#endif
} }
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
...@@ -492,6 +465,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, ...@@ -492,6 +465,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
start = addr; start = addr;
do { do {
unsigned long more;
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
...@@ -502,15 +477,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, ...@@ -502,15 +477,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
WARN_ON(!pmd_none_or_clear_bad(pmd)); WARN_ON(!pmd_none_or_clear_bad(pmd));
continue; continue;
} }
#ifdef CONFIG_PPC_FSL_BOOK3E
/* /*
* Increment next by the size of the huge mapping since * Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a * there may be more than one entry at this level for a
* single hugepage, but all of them point to * single hugepage, but all of them point to
* the same kmem cache that holds the hugepte. * the same kmem cache that holds the hugepte.
*/ */
next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
#endif if (more > next)
next = more;
free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
addr, next, floor, ceiling); addr, next, floor, ceiling);
} while (addr = next, addr != end); } while (addr = next, addr != end);
...@@ -550,15 +526,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, ...@@ -550,15 +526,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
hugetlb_free_pmd_range(tlb, pud, addr, next, floor, hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
ceiling); ceiling);
} else { } else {
#ifdef CONFIG_PPC_FSL_BOOK3E unsigned long more;
/* /*
* Increment next by the size of the huge mapping since * Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a * there may be more than one entry at this level for a
* single hugepage, but all of them point to * single hugepage, but all of them point to
* the same kmem cache that holds the hugepte. * the same kmem cache that holds the hugepte.
*/ */
next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
#endif if (more > next)
next = more;
free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
addr, next, floor, ceiling); addr, next, floor, ceiling);
} }
...@@ -615,15 +593,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, ...@@ -615,15 +593,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
continue; continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} else { } else {
#ifdef CONFIG_PPC_FSL_BOOK3E unsigned long more;
/* /*
* Increment next by the size of the huge mapping since * Increment next by the size of the huge mapping since
* there may be more than one entry at the pgd level * there may be more than one entry at the pgd level
* for a single hugepage, but all of them point to the * for a single hugepage, but all of them point to the
* same kmem cache that holds the hugepte. * same kmem cache that holds the hugepte.
*/ */
next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
#endif if (more > next)
next = more;
free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
addr, next, floor, ceiling); addr, next, floor, ceiling);
} }
...@@ -753,12 +733,13 @@ static int __init add_huge_page_size(unsigned long long size) ...@@ -753,12 +733,13 @@ static int __init add_huge_page_size(unsigned long long size)
/* Check that it is a page size supported by the hardware and /* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */ * that it fits within pagetable and slice limits. */
if (size <= PAGE_SIZE)
return -EINVAL;
#ifdef CONFIG_PPC_FSL_BOOK3E #ifdef CONFIG_PPC_FSL_BOOK3E
if ((size < PAGE_SIZE) || !is_power_of_4(size)) if (!is_power_of_4(size))
return -EINVAL; return -EINVAL;
#else #else
if (!is_power_of_2(size) if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
|| (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
return -EINVAL; return -EINVAL;
#endif #endif
...@@ -791,53 +772,15 @@ static int __init hugepage_setup_sz(char *str) ...@@ -791,53 +772,15 @@ static int __init hugepage_setup_sz(char *str)
} }
__setup("hugepagesz=", hugepage_setup_sz); __setup("hugepagesz=", hugepage_setup_sz);
#ifdef CONFIG_PPC_FSL_BOOK3E
struct kmem_cache *hugepte_cache; struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void) static int __init hugetlbpage_init(void)
{ {
int psize; int psize;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { #if !defined(CONFIG_PPC_FSL_BOOK3E)
unsigned shift;
if (!mmu_psize_defs[psize].shift)
continue;
shift = mmu_psize_to_shift(psize);
/* Don't treat normal page sizes as huge... */
if (shift != PAGE_SHIFT)
if (add_huge_page_size(1ULL << shift) < 0)
continue;
}
/*
* Create a kmem cache for hugeptes. The bottom bits in the pte have
* size information encoded in them, so align them to allow this
*/
hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
HUGEPD_SHIFT_MASK + 1, 0, NULL);
if (hugepte_cache == NULL)
panic("%s: Unable to create kmem cache for hugeptes\n",
__func__);
/* Default hpage size = 4M */
if (mmu_psize_defs[MMU_PAGE_4M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
else
panic("%s: Unable to set default huge page size\n", __func__);
return 0;
}
#else
static int __init hugetlbpage_init(void)
{
int psize;
if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV; return -ENODEV;
#endif
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift; unsigned shift;
unsigned pdshift; unsigned pdshift;
...@@ -850,9 +793,9 @@ static int __init hugetlbpage_init(void) ...@@ -850,9 +793,9 @@ static int __init hugetlbpage_init(void)
if (add_huge_page_size(1ULL << shift) < 0) if (add_huge_page_size(1ULL << shift) < 0)
continue; continue;
if (shift < PMD_SHIFT) if (shift < HUGEPD_PUD_SHIFT)
pdshift = PMD_SHIFT; pdshift = PMD_SHIFT;
else if (shift < PUD_SHIFT) else if (shift < HUGEPD_PGD_SHIFT)
pdshift = PUD_SHIFT; pdshift = PUD_SHIFT;
else else
pdshift = PGDIR_SHIFT; pdshift = PGDIR_SHIFT;
...@@ -860,14 +803,36 @@ static int __init hugetlbpage_init(void) ...@@ -860,14 +803,36 @@ static int __init hugetlbpage_init(void)
* if we have pdshift and shift value same, we don't * if we have pdshift and shift value same, we don't
* use pgt cache for hugepd. * use pgt cache for hugepd.
*/ */
if (pdshift != shift) { if (pdshift > shift) {
pgtable_cache_add(pdshift - shift, NULL); pgtable_cache_add(pdshift - shift, NULL);
if (!PGT_CACHE(pdshift - shift)) if (!PGT_CACHE(pdshift - shift))
panic("hugetlbpage_init(): could not create " panic("hugetlbpage_init(): could not create "
"pgtable cache for %d bit pagesize\n", shift); "pgtable cache for %d bit pagesize\n", shift);
} }
#ifdef CONFIG_PPC_FSL_BOOK3E
else if (!hugepte_cache) {
/*
* Create a kmem cache for hugeptes. The bottom bits in
* the pte have size information encoded in them, so
* align them to allow this
*/
hugepte_cache = kmem_cache_create("hugepte-cache",
sizeof(pte_t),
HUGEPD_SHIFT_MASK + 1,
0, NULL);
if (hugepte_cache == NULL)
panic("%s: Unable to create kmem cache "
"for hugeptes\n", __func__);
}
#endif
} }
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Default hpage size = 4M */
if (mmu_psize_defs[MMU_PAGE_4M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
#else
/* Set default large page size. Currently, we pick 16M or 1M /* Set default large page size. Currently, we pick 16M or 1M
* depending on what is available * depending on what is available
*/ */
...@@ -877,11 +842,13 @@ static int __init hugetlbpage_init(void) ...@@ -877,11 +842,13 @@ static int __init hugetlbpage_init(void)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
else if (mmu_psize_defs[MMU_PAGE_2M].shift) else if (mmu_psize_defs[MMU_PAGE_2M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
#endif
else
panic("%s: Unable to set default huge page size\n", __func__);
return 0; return 0;
} }
#endif
arch_initcall(hugetlbpage_init); arch_initcall(hugetlbpage_init);
void flush_dcache_icache_hugepage(struct page *page) void flush_dcache_icache_hugepage(struct page *page)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment