Commit aa729dcc authored by Catalin Marinas's avatar Catalin Marinas

Merge branch 'for-next/hugepages' of...

Merge branch 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux into upstream-hugepages

* 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux:
  ARM64: mm: THP support.
  ARM64: mm: Raise MAX_ORDER for 64KB pages and THP.
  ARM64: mm: HugeTLB support.
  ARM64: mm: Move PTE_PROT_NONE bit.
  ARM64: mm: Make PAGE_NONE pages read only and no-execute.
  ARM64: mm: Restore memblock limit when map_mem finished.
  mm: thp: Correct the HPAGE_PMD_ORDER check.
  x86: mm: Remove general hugetlb code from x86.
  mm: hugetlb: Copy general hugetlb code from x86 to mm.
  x86: mm: Remove x86 version of huge_pmd_share.
  mm: hugetlb: Copy huge_pmd_share from x86 to mm.

Conflicts:
	arch/arm64/Kconfig
	arch/arm64/include/asm/pgtable-hwdef.h
	arch/arm64/include/asm/pgtable.h
parents ee877b53 af074848
......@@ -188,6 +188,18 @@ config HW_PERF_EVENTS
Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only.
config SYS_SUPPORTS_HUGETLBFS
def_bool y
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if !ARM64_64K_PAGES
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y
source "mm/Kconfig"
config XEN_DOM0
......@@ -200,6 +212,11 @@ config XEN
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
config FORCE_MAX_ZONEORDER
int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
endmenu
menu "Boot options"
......
/*
* arch/arm64/include/asm/hugetlb.h
*
* Copyright (C) 2013 Linaro Ltd.
*
* Based on arch/x86/include/asm/hugetlb.h
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
#include <asm-generic/hugetlb.h>
#include <asm/page.h>
static inline pte_t huge_ptep_get(pte_t *ptep)
{
return *ptep;
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
set_pte_at(mm, addr, ptep, pte);
}
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
ptep_clear_flush(vma, addr, ptep);
}
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
ptep_set_wrprotect(mm, addr, ptep);
}
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
return ptep_get_and_clear(mm, addr, ptep);
}
static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
}
static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
unsigned long floor,
unsigned long ceiling)
{
free_pgd_range(tlb, addr, end, floor, ceiling);
}
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
{
return 0;
}
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
struct hstate *h = hstate_file(file);
if (len & ~huge_page_mask(h))
return -EINVAL;
if (addr & ~huge_page_mask(h))
return -EINVAL;
return 0;
}
static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
{
}
static inline int huge_pte_none(pte_t pte)
{
return pte_none(pte);
}
static inline pte_t huge_pte_wrprotect(pte_t pte)
{
return pte_wrprotect(pte);
}
static inline int arch_prepare_hugepage(struct page *page)
{
return 0;
}
static inline void arch_release_hugepage(struct page *page)
{
}
static inline void arch_clear_hugepage_flags(struct page *page)
{
clear_bit(PG_dcache_clean, &page->flags);
}
#endif /* __ASM_HUGETLB_H */
......@@ -25,17 +25,27 @@
/*
* Hardware page table definitions.
*
* Level 1 descriptor (PUD).
*/
#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
/*
* Level 2 descriptor (PMD).
*/
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
/*
* Section
*/
#define PMD_SECT_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
......@@ -54,6 +64,7 @@
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
......
......@@ -25,8 +25,8 @@
* Software defined PTE bits definition.
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */
#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */
#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
......@@ -66,7 +66,7 @@ extern pgprot_t pgprot_default;
#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE)
#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
......@@ -82,7 +82,7 @@ extern pgprot_t pgprot_default;
#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE)
#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
......@@ -179,11 +179,75 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
/*
* Huge pte definitions.
*/
#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE)
#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE))
#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
/*
* Hugetlb definitions.
*/
#define HUGE_MAX_HSTATE 2
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define __HAVE_ARCH_PTE_SPECIAL
/*
* Software PMD bits for THP
*/
#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57)
/*
* THP definitions.
*/
#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
#endif
#define PMD_BIT_FUNC(fn,op) \
static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
PMD_SECT_VALID;
pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
return pmd;
}
#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd)
static inline int has_transparent_hugepage(void)
{
return 1;
}
/*
* Mark the prot value as uncacheable and unbufferable.
*/
......@@ -293,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
* bit 2: PTE_FILE
* bits 3-8: swap type
* bits 0, 2: present (must both be zero)
* bit 3: PTE_FILE
* bits 4-8: swap type
* bits 9-63: swap offset
*/
#define __SWP_TYPE_SHIFT 3
#define __SWP_TYPE_SHIFT 4
#define __SWP_TYPE_BITS 6
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
......@@ -318,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a file entry:
* bits 0-1: present (must be zero)
* bit 2: PTE_FILE
* bits 3-63: file offset / PAGE_SIZE
* bits 0, 2: present (must both be zero)
* bit 3: PTE_FILE
* bits 4-63: file offset / PAGE_SIZE
*/
#define pte_file(pte) (pte_val(pte) & PTE_FILE)
#define pte_to_pgoff(x) (pte_val(x) >> 3)
#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE)
#define pte_to_pgoff(x) (pte_val(x) >> 4)
#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE)
#define PTE_FILE_MAX_BITS 61
#define PTE_FILE_MAX_BITS 60
extern int kern_addr_valid(unsigned long addr);
......
......@@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
#define tlb_migrate_finish(mm) do { } while (0)
static inline void
tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
{
tlb_add_flush(tlb, addr);
}
#endif
......@@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
dsb();
}
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#endif
#endif
......@@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
context.o tlb.o proc.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -364,17 +364,6 @@ static int __kprobes do_translation_fault(unsigned long addr,
return 0;
}
/*
* Some section permission faults need to be handled gracefully. They can
* happen due to a __{get,put}_user during an oops.
*/
static int do_sect_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
do_bad_area(addr, esr, regs);
return 0;
}
/*
* This abort handler always returns "fault".
*/
......@@ -398,12 +387,12 @@ static struct fault_info {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGBUS, 0, "reserved access flag fault" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
{ do_bad, SIGBUS, 0, "reserved permission fault" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "asynchronous external abort" },
......
/*
* arch/arm64/mm/hugetlbpage.c
*
* Copyright (C) 2013 Linaro Ltd.
*
* Based on arch/x86/mm/hugetlbpage.c.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}
#endif
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write)
{
return ERR_PTR(-EINVAL);
}
int pmd_huge(pmd_t pmd)
{
return !(pmd_val(pmd) & PMD_TABLE_BIT);
}
int pud_huge(pud_t pud)
{
return !(pud_val(pud) & PUD_TABLE_BIT);
}
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
if (ps == PMD_SIZE) {
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
return 0;
}
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
......@@ -297,6 +297,16 @@ static void __init map_mem(void)
{
struct memblock_region *reg;
/*
* Temporarily limit the memblock range. We need to do this as
* create_mapping requires puds, pmds and ptes to be allocated from
* memory addressable from the initial direct kernel mapping.
*
* The initial direct kernel mapping, located at swapper_pg_dir,
* gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned).
*/
memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
/* map all the memory banks */
for_each_memblock(memory, reg) {
phys_addr_t start = reg->base;
......@@ -307,6 +317,9 @@ static void __init map_mem(void)
create_mapping(start, __phys_to_virt(start), end - start);
}
/* Limit no longer required. */
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
}
/*
......@@ -317,12 +330,6 @@ void __init paging_init(void)
{
void *zero_page;
/*
* Maximum PGDIR_SIZE addressable via the initial direct kernel
* mapping in swapper_pg_dir.
*/
memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
init_mem_pgprot();
map_mem();
......
......@@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE
config ARCH_SUSPEND_POSSIBLE
def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ZONE_DMA32
bool
default X86_64
......
......@@ -16,169 +16,6 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
static unsigned long page_table_shareable(struct vm_area_struct *svma,
struct vm_area_struct *vma,
unsigned long addr, pgoff_t idx)
{
unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
svma->vm_start;
unsigned long sbase = saddr & PUD_MASK;
unsigned long s_end = sbase + PUD_SIZE;
/* Allow segments to share if only one is marked locked */
unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
/*
* match the virtual addresses, permission and the alignment of the
* page table page.
*/
if (pmd_index(addr) != pmd_index(saddr) ||
vm_flags != svm_flags ||
sbase < svma->vm_start || svma->vm_end < s_end)
return 0;
return saddr;
}
static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
unsigned long base = addr & PUD_MASK;
unsigned long end = base + PUD_SIZE;
/*
* check on proper vm_flags and page table alignment
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
return 1;
return 0;
}
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
* code much cleaner. pmd allocation is essential for the shared case because
* pud has to be populated inside the same i_mmap_mutex section - otherwise
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
* bad pmd for sharing.
*/
static pte_t *
huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
pte_t *pte;
if (!vma_shareable(vma, addr))
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
get_page(virt_to_page(spte));
break;
}
}
}
if (!spte)
goto out;
spin_lock(&mm->page_table_lock);
if (pud_none(*pud))
pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
else
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex);
return pte;
}
/*
* unmap huge page backed by shared pte.
*
* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
* indicated by page_count > 1, unmap is achieved by clearing pud and
* decrementing the ref count. If count == 1, the pte page is not shared.
*
* called with vma->vm_mm->page_table_lock held.
*
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
pud_t *pud = pud_offset(pgd, *addr);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
pud_clear(pud);
put_page(virt_to_page(ptep));
*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
return 1;
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud) {
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
} else {
BUG_ON(sz != PMD_SIZE);
if (pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) {
pud = pud_offset(pgd, addr);
if (pud_present(*pud)) {
if (pud_large(*pud))
return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
}
}
return (pte_t *) pmd;
}
#if 0 /* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
......@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
return !!(pud_val(pud) & _PAGE_PSE);
}
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pmd);
if (page)
page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
return page;
}
struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pud);
if (page)
page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
return page;
}
#endif
/* x86_64 also uses this file */
......
......@@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
} while (0)
extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER
#if HPAGE_PMD_ORDER >= MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
extern int hugepage_madvise(struct vm_area_struct *vma,
......
......@@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
int dequeue_hwpoisoned_huge_page(struct page *page);
void copy_huge_page(struct page *dst, struct page *src);
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
#endif
extern unsigned long hugepages_treat_as_movable;
extern const unsigned long hugetlb_zero, hugetlb_infinity;
extern int sysctl_hugetlb_shm_group;
......
......@@ -2931,15 +2931,6 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}
/* Can be overriden by architectures */
__attribute__((weak)) struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
BUG();
return NULL;
}
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, unsigned long *nr_pages,
......@@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
hugetlb_acct_memory(h, -(chg - freed));
}
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
static unsigned long page_table_shareable(struct vm_area_struct *svma,
struct vm_area_struct *vma,
unsigned long addr, pgoff_t idx)
{
unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
svma->vm_start;
unsigned long sbase = saddr & PUD_MASK;
unsigned long s_end = sbase + PUD_SIZE;
/* Allow segments to share if only one is marked locked */
unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
/*
* match the virtual addresses, permission and the alignment of the
* page table page.
*/
if (pmd_index(addr) != pmd_index(saddr) ||
vm_flags != svm_flags ||
sbase < svma->vm_start || svma->vm_end < s_end)
return 0;
return saddr;
}
static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
unsigned long base = addr & PUD_MASK;
unsigned long end = base + PUD_SIZE;
/*
* check on proper vm_flags and page table alignment
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
return 1;
return 0;
}
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
* code much cleaner. pmd allocation is essential for the shared case because
* pud has to be populated inside the same i_mmap_mutex section - otherwise
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
* bad pmd for sharing.
*/
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
pte_t *pte;
if (!vma_shareable(vma, addr))
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
get_page(virt_to_page(spte));
break;
}
}
}
if (!spte)
goto out;
spin_lock(&mm->page_table_lock);
if (pud_none(*pud))
pud_populate(mm, pud,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
else
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex);
return pte;
}
/*
* unmap huge page backed by shared pte.
*
* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
* indicated by page_count > 1, unmap is achieved by clearing pud and
* decrementing the ref count. If count == 1, the pte page is not shared.
*
* called with vma->vm_mm->page_table_lock held.
*
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
pud_t *pud = pud_offset(pgd, *addr);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
pud_clear(pud);
put_page(virt_to_page(ptep));
*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
return 1;
}
#define want_pmd_share() (1)
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
return NULL;
}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud) {
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
} else {
BUG_ON(sz != PMD_SIZE);
if (want_pmd_share() && pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) {
pud = pud_offset(pgd, addr);
if (pud_present(*pud)) {
if (pud_huge(*pud))
return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
}
}
return (pte_t *) pmd;
}
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pmd);
if (page)
page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
return page;
}
struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pud);
if (page)
page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
return page;
}
#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
/* Can be overriden by architectures */
__attribute__((weak)) struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
BUG();
return NULL;
}
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
#ifdef CONFIG_MEMORY_FAILURE
/* Should be called in hugetlb_lock */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment