Commit aa729dcc authored by Catalin Marinas's avatar Catalin Marinas

Merge branch 'for-next/hugepages' of...

Merge branch 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux into upstream-hugepages

* 'for-next/hugepages' of git://git.linaro.org/people/stevecapper/linux:
  ARM64: mm: THP support.
  ARM64: mm: Raise MAX_ORDER for 64KB pages and THP.
  ARM64: mm: HugeTLB support.
  ARM64: mm: Move PTE_PROT_NONE bit.
  ARM64: mm: Make PAGE_NONE pages read only and no-execute.
  ARM64: mm: Restore memblock limit when map_mem finished.
  mm: thp: Correct the HPAGE_PMD_ORDER check.
  x86: mm: Remove general hugetlb code from x86.
  mm: hugetlb: Copy general hugetlb code from x86 to mm.
  x86: mm: Remove x86 version of huge_pmd_share.
  mm: hugetlb: Copy huge_pmd_share from x86 to mm.

Conflicts:
	arch/arm64/Kconfig
	arch/arm64/include/asm/pgtable-hwdef.h
	arch/arm64/include/asm/pgtable.h
parents ee877b53 af074848
...@@ -188,6 +188,18 @@ config HW_PERF_EVENTS ...@@ -188,6 +188,18 @@ config HW_PERF_EVENTS
Enable hardware performance counter support for perf events. If Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only. disabled, perf events will use software events only.
config SYS_SUPPORTS_HUGETLBFS
def_bool y
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if !ARM64_64K_PAGES
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y
source "mm/Kconfig" source "mm/Kconfig"
config XEN_DOM0 config XEN_DOM0
...@@ -200,6 +212,11 @@ config XEN ...@@ -200,6 +212,11 @@ config XEN
help help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
config FORCE_MAX_ZONEORDER
int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
endmenu endmenu
menu "Boot options" menu "Boot options"
......
/*
* arch/arm64/include/asm/hugetlb.h
*
* Copyright (C) 2013 Linaro Ltd.
*
* Based on arch/x86/include/asm/hugetlb.h
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
#include <asm-generic/hugetlb.h>
#include <asm/page.h>
static inline pte_t huge_ptep_get(pte_t *ptep)
{
return *ptep;
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
set_pte_at(mm, addr, ptep, pte);
}
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
ptep_clear_flush(vma, addr, ptep);
}
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
ptep_set_wrprotect(mm, addr, ptep);
}
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
return ptep_get_and_clear(mm, addr, ptep);
}
static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
}
static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
unsigned long floor,
unsigned long ceiling)
{
free_pgd_range(tlb, addr, end, floor, ceiling);
}
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
{
return 0;
}
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
struct hstate *h = hstate_file(file);
if (len & ~huge_page_mask(h))
return -EINVAL;
if (addr & ~huge_page_mask(h))
return -EINVAL;
return 0;
}
static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
{
}
static inline int huge_pte_none(pte_t pte)
{
return pte_none(pte);
}
static inline pte_t huge_pte_wrprotect(pte_t pte)
{
return pte_wrprotect(pte);
}
static inline int arch_prepare_hugepage(struct page *page)
{
return 0;
}
static inline void arch_release_hugepage(struct page *page)
{
}
static inline void arch_clear_hugepage_flags(struct page *page)
{
clear_bit(PG_dcache_clean, &page->flags);
}
#endif /* __ASM_HUGETLB_H */
...@@ -25,17 +25,27 @@ ...@@ -25,17 +25,27 @@
/* /*
* Hardware page table definitions. * Hardware page table definitions.
* *
* Level 1 descriptor (PUD).
*/
#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
/*
* Level 2 descriptor (PMD). * Level 2 descriptor (PMD).
*/ */
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
/* /*
* Section * Section
*/ */
#define PMD_SECT_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
...@@ -54,6 +64,7 @@ ...@@ -54,6 +64,7 @@
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
......
...@@ -25,8 +25,8 @@ ...@@ -25,8 +25,8 @@
* Software defined PTE bits definition. * Software defined PTE bits definition.
*/ */
#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ #define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */
#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
...@@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; ...@@ -66,7 +66,7 @@ extern pgprot_t pgprot_default;
#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) #define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
...@@ -82,7 +82,7 @@ extern pgprot_t pgprot_default; ...@@ -82,7 +82,7 @@ extern pgprot_t pgprot_default;
#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) #define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
...@@ -179,11 +179,75 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, ...@@ -179,11 +179,75 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
/* /*
* Huge pte definitions. * Huge pte definitions.
*/ */
#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) #define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) #define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
/*
* Hugetlb definitions.
*/
#define HUGE_MAX_HSTATE 2
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define __HAVE_ARCH_PTE_SPECIAL #define __HAVE_ARCH_PTE_SPECIAL
/*
* Software PMD bits for THP
*/
#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57)
/*
* THP definitions.
*/
#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
#endif
#define PMD_BIT_FUNC(fn,op) \
static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
PMD_SECT_VALID;
pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
return pmd;
}
#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd)
static inline int has_transparent_hugepage(void)
{
return 1;
}
/* /*
* Mark the prot value as uncacheable and unbufferable. * Mark the prot value as uncacheable and unbufferable.
*/ */
...@@ -293,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; ...@@ -293,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/* /*
* Encode and decode a swap entry: * Encode and decode a swap entry:
* bits 0-1: present (must be zero) * bits 0, 2: present (must both be zero)
* bit 2: PTE_FILE * bit 3: PTE_FILE
* bits 3-8: swap type * bits 4-8: swap type
* bits 9-63: swap offset * bits 9-63: swap offset
*/ */
#define __SWP_TYPE_SHIFT 3 #define __SWP_TYPE_SHIFT 4
#define __SWP_TYPE_BITS 6 #define __SWP_TYPE_BITS 6
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
...@@ -318,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; ...@@ -318,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/* /*
* Encode and decode a file entry: * Encode and decode a file entry:
* bits 0-1: present (must be zero) * bits 0, 2: present (must both be zero)
* bit 2: PTE_FILE * bit 3: PTE_FILE
* bits 3-63: file offset / PAGE_SIZE * bits 4-63: file offset / PAGE_SIZE
*/ */
#define pte_file(pte) (pte_val(pte) & PTE_FILE) #define pte_file(pte) (pte_val(pte) & PTE_FILE)
#define pte_to_pgoff(x) (pte_val(x) >> 3) #define pte_to_pgoff(x) (pte_val(x) >> 4)
#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) #define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE)
#define PTE_FILE_MAX_BITS 61 #define PTE_FILE_MAX_BITS 60
extern int kern_addr_valid(unsigned long addr); extern int kern_addr_valid(unsigned long addr);
......
...@@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, ...@@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
#define tlb_migrate_finish(mm) do { } while (0) #define tlb_migrate_finish(mm) do { } while (0)
static inline void
tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
{
tlb_add_flush(tlb, addr);
}
#endif #endif
...@@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, ...@@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
dsb(); dsb();
} }
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#endif #endif
#endif #endif
...@@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ...@@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \ cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \ ioremap.o mmap.o pgd.o mmu.o \
context.o tlb.o proc.o context.o tlb.o proc.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
...@@ -364,17 +364,6 @@ static int __kprobes do_translation_fault(unsigned long addr, ...@@ -364,17 +364,6 @@ static int __kprobes do_translation_fault(unsigned long addr,
return 0; return 0;
} }
/*
* Some section permission faults need to be handled gracefully. They can
* happen due to a __{get,put}_user during an oops.
*/
static int do_sect_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
do_bad_area(addr, esr, regs);
return 0;
}
/* /*
* This abort handler always returns "fault". * This abort handler always returns "fault".
*/ */
...@@ -398,12 +387,12 @@ static struct fault_info { ...@@ -398,12 +387,12 @@ static struct fault_info {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGBUS, 0, "reserved access flag fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
{ do_bad, SIGBUS, 0, "reserved permission fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" },
{ do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "asynchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" },
......
/*
* arch/arm64/mm/hugetlbpage.c
*
* Copyright (C) 2013 Linaro Ltd.
*
* Based on arch/x86/mm/hugetlbpage.c.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}
#endif
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write)
{
return ERR_PTR(-EINVAL);
}
int pmd_huge(pmd_t pmd)
{
return !(pmd_val(pmd) & PMD_TABLE_BIT);
}
int pud_huge(pud_t pud)
{
return !(pud_val(pud) & PUD_TABLE_BIT);
}
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
if (ps == PMD_SIZE) {
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
return 0;
}
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
...@@ -297,6 +297,16 @@ static void __init map_mem(void) ...@@ -297,6 +297,16 @@ static void __init map_mem(void)
{ {
struct memblock_region *reg; struct memblock_region *reg;
/*
* Temporarily limit the memblock range. We need to do this as
* create_mapping requires puds, pmds and ptes to be allocated from
* memory addressable from the initial direct kernel mapping.
*
* The initial direct kernel mapping, located at swapper_pg_dir,
* gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned).
*/
memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
/* map all the memory banks */ /* map all the memory banks */
for_each_memblock(memory, reg) { for_each_memblock(memory, reg) {
phys_addr_t start = reg->base; phys_addr_t start = reg->base;
...@@ -307,6 +317,9 @@ static void __init map_mem(void) ...@@ -307,6 +317,9 @@ static void __init map_mem(void)
create_mapping(start, __phys_to_virt(start), end - start); create_mapping(start, __phys_to_virt(start), end - start);
} }
/* Limit no longer required. */
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
} }
/* /*
...@@ -317,12 +330,6 @@ void __init paging_init(void) ...@@ -317,12 +330,6 @@ void __init paging_init(void)
{ {
void *zero_page; void *zero_page;
/*
* Maximum PGDIR_SIZE addressable via the initial direct kernel
* mapping in swapper_pg_dir.
*/
memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
init_mem_pgprot(); init_mem_pgprot();
map_mem(); map_mem();
......
...@@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE ...@@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE
config ARCH_SUSPEND_POSSIBLE config ARCH_SUSPEND_POSSIBLE
def_bool y def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ZONE_DMA32 config ZONE_DMA32
bool bool
default X86_64 default X86_64
......
...@@ -16,169 +16,6 @@ ...@@ -16,169 +16,6 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
static unsigned long page_table_shareable(struct vm_area_struct *svma,
struct vm_area_struct *vma,
unsigned long addr, pgoff_t idx)
{
unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
svma->vm_start;
unsigned long sbase = saddr & PUD_MASK;
unsigned long s_end = sbase + PUD_SIZE;
/* Allow segments to share if only one is marked locked */
unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
/*
* match the virtual addresses, permission and the alignment of the
* page table page.
*/
if (pmd_index(addr) != pmd_index(saddr) ||
vm_flags != svm_flags ||
sbase < svma->vm_start || svma->vm_end < s_end)
return 0;
return saddr;
}
static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
unsigned long base = addr & PUD_MASK;
unsigned long end = base + PUD_SIZE;
/*
* check on proper vm_flags and page table alignment
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
return 1;
return 0;
}
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
* code much cleaner. pmd allocation is essential for the shared case because
* pud has to be populated inside the same i_mmap_mutex section - otherwise
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
* bad pmd for sharing.
*/
static pte_t *
huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
pte_t *pte;
if (!vma_shareable(vma, addr))
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
get_page(virt_to_page(spte));
break;
}
}
}
if (!spte)
goto out;
spin_lock(&mm->page_table_lock);
if (pud_none(*pud))
pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
else
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex);
return pte;
}
/*
* unmap huge page backed by shared pte.
*
* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
* indicated by page_count > 1, unmap is achieved by clearing pud and
* decrementing the ref count. If count == 1, the pte page is not shared.
*
* called with vma->vm_mm->page_table_lock held.
*
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
pud_t *pud = pud_offset(pgd, *addr);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
pud_clear(pud);
put_page(virt_to_page(ptep));
*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
return 1;
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud) {
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
} else {
BUG_ON(sz != PMD_SIZE);
if (pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) {
pud = pud_offset(pgd, addr);
if (pud_present(*pud)) {
if (pud_large(*pud))
return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
}
}
return (pte_t *) pmd;
}
#if 0 /* This is just for testing */ #if 0 /* This is just for testing */
struct page * struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
...@@ -240,30 +77,6 @@ int pud_huge(pud_t pud) ...@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
return !!(pud_val(pud) & _PAGE_PSE); return !!(pud_val(pud) & _PAGE_PSE);
} }
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pmd);
if (page)
page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
return page;
}
struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pud);
if (page)
page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
return page;
}
#endif #endif
/* x86_64 also uses this file */ /* x86_64 also uses this file */
......
...@@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, ...@@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
} while (0) } while (0)
extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
pmd_t *pmd); pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER #if HPAGE_PMD_ORDER >= MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator" #error "hugepages can't be allocated by the buddy allocator"
#endif #endif
extern int hugepage_madvise(struct vm_area_struct *vma, extern int hugepage_madvise(struct vm_area_struct *vma,
......
...@@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); ...@@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
int dequeue_hwpoisoned_huge_page(struct page *page); int dequeue_hwpoisoned_huge_page(struct page *page);
void copy_huge_page(struct page *dst, struct page *src); void copy_huge_page(struct page *dst, struct page *src);
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
#endif
extern unsigned long hugepages_treat_as_movable; extern unsigned long hugepages_treat_as_movable;
extern const unsigned long hugetlb_zero, hugetlb_infinity; extern const unsigned long hugetlb_zero, hugetlb_infinity;
extern int sysctl_hugetlb_shm_group; extern int sysctl_hugetlb_shm_group;
......
...@@ -2931,15 +2931,6 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2931,15 +2931,6 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret; return ret;
} }
/* Can be overriden by architectures */
__attribute__((weak)) struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
BUG();
return NULL;
}
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas, struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, unsigned long *nr_pages, unsigned long *position, unsigned long *nr_pages,
...@@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) ...@@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
hugetlb_acct_memory(h, -(chg - freed)); hugetlb_acct_memory(h, -(chg - freed));
} }
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
static unsigned long page_table_shareable(struct vm_area_struct *svma,
struct vm_area_struct *vma,
unsigned long addr, pgoff_t idx)
{
unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
svma->vm_start;
unsigned long sbase = saddr & PUD_MASK;
unsigned long s_end = sbase + PUD_SIZE;
/* Allow segments to share if only one is marked locked */
unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
/*
* match the virtual addresses, permission and the alignment of the
* page table page.
*/
if (pmd_index(addr) != pmd_index(saddr) ||
vm_flags != svm_flags ||
sbase < svma->vm_start || svma->vm_end < s_end)
return 0;
return saddr;
}
static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
unsigned long base = addr & PUD_MASK;
unsigned long end = base + PUD_SIZE;
/*
* check on proper vm_flags and page table alignment
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
return 1;
return 0;
}
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
* code much cleaner. pmd allocation is essential for the shared case because
* pud has to be populated inside the same i_mmap_mutex section - otherwise
* racing tasks could either miss the sharing (see huge_pte_offset) or select a
* bad pmd for sharing.
*/
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
pte_t *pte;
if (!vma_shareable(vma, addr))
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
get_page(virt_to_page(spte));
break;
}
}
}
if (!spte)
goto out;
spin_lock(&mm->page_table_lock);
if (pud_none(*pud))
pud_populate(mm, pud,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
else
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex);
return pte;
}
/*
* unmap huge page backed by shared pte.
*
* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
* indicated by page_count > 1, unmap is achieved by clearing pud and
* decrementing the ref count. If count == 1, the pte page is not shared.
*
* called with vma->vm_mm->page_table_lock held.
*
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
pud_t *pud = pud_offset(pgd, *addr);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
pud_clear(pud);
put_page(virt_to_page(ptep));
*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
return 1;
}
#define want_pmd_share() (1)
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
return NULL;
}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud) {
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
} else {
BUG_ON(sz != PMD_SIZE);
if (want_pmd_share() && pud_none(*pud))
pte = huge_pmd_share(mm, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) {
pud = pud_offset(pgd, addr);
if (pud_present(*pud)) {
if (pud_huge(*pud))
return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
}
}
return (pte_t *) pmd;
}
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pmd);
if (page)
page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
return page;
}
struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pud);
if (page)
page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
return page;
}
#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
/* Can be overriden by architectures */
__attribute__((weak)) struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
BUG();
return NULL;
}
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
#ifdef CONFIG_MEMORY_FAILURE #ifdef CONFIG_MEMORY_FAILURE
/* Should be called in hugetlb_lock */ /* Should be called in hugetlb_lock */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment