Commit 31382a8d authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] convert Linux to 4-level page tables

Extend the Linux MM to 4level page tables.

This is the core patch for mm/*, fs/*, include/linux/*

It breaks all architectures, which will be fixed in separate patches.

The conversion is quite straight forward.  All the functions walking the page
table hierarchy have been changed to deal with another level at the top.  The
additional level is called pml4. 

mm/memory.c has changed a lot because it did most of the heavy lifting here.
Most of the changes here are extensions of the previous code.  
Signed-off-by: default avatarAndi Kleen <ak@suse.de>

Converted by Nick Piggin to use the pud_t 'page upper' level between pgd
and pmd instead of Andi's pml4 level above pgd. 
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 5787c698
...@@ -124,7 +124,8 @@ static inline unsigned long ...@@ -124,7 +124,8 @@ static inline unsigned long
drm_follow_page (void *vaddr) drm_follow_page (void *vaddr)
{ {
pgd_t *pgd = pgd_offset_k((unsigned long) vaddr); pgd_t *pgd = pgd_offset_k((unsigned long) vaddr);
pmd_t *pmd = pmd_offset(pgd, (unsigned long) vaddr); pud_t *pud = pud_offset(pgd, (unsigned long) vaddr);
pmd_t *pmd = pmd_offset(pud, (unsigned long) vaddr);
pte_t *ptep = pte_offset_kernel(pmd, (unsigned long) vaddr); pte_t *ptep = pte_offset_kernel(pmd, (unsigned long) vaddr);
return pte_pfn(*ptep) << PAGE_SHIFT; return pte_pfn(*ptep) << PAGE_SHIFT;
} }
......
...@@ -300,6 +300,7 @@ void install_arg_page(struct vm_area_struct *vma, ...@@ -300,6 +300,7 @@ void install_arg_page(struct vm_area_struct *vma,
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
pgd_t * pgd; pgd_t * pgd;
pud_t * pud;
pmd_t * pmd; pmd_t * pmd;
pte_t * pte; pte_t * pte;
...@@ -310,7 +311,10 @@ void install_arg_page(struct vm_area_struct *vma, ...@@ -310,7 +311,10 @@ void install_arg_page(struct vm_area_struct *vma,
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
pmd = pmd_alloc(mm, pgd, address); pud = pud_alloc(mm, pgd, address);
if (!pud)
goto out;
pmd = pmd_alloc(mm, pud, address);
if (!pmd) if (!pmd)
goto out; goto out;
pte = pte_alloc_map(mm, pmd, address); pte = pte_alloc_map(mm, pmd, address);
......
...@@ -581,7 +581,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, ...@@ -581,7 +581,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
} }
extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate(struct inode * inode, loff_t offset);
extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
...@@ -626,15 +627,22 @@ extern struct shrinker *set_shrinker(int, shrinker_t); ...@@ -626,15 +627,22 @@ extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker); extern void remove_shrinker(struct shrinker *shrinker);
/* /*
* On a two-level page table, this ends up being trivial. Thus the * On a two-level or three-level page table, this ends up being trivial. Thus
* inlining and the symmetry break with pte_alloc_map() that does all * the inlining and the symmetry break with pte_alloc_map() that does all
* of this out-of-line. * of this out-of-line.
*/ */
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{ {
if (pgd_none(*pgd)) if (pgd_none(*pgd))
return __pmd_alloc(mm, pgd, address); return __pud_alloc(mm, pgd, address);
return pmd_offset(pgd, address); return pud_offset(pgd, address);
}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
if (pud_none(*pud))
return __pmd_alloc(mm, pud, address);
return pmd_offset(pud, address);
} }
extern void free_area_init(unsigned long * zones_size); extern void free_area_init(unsigned long * zones_size);
......
...@@ -60,14 +60,19 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -60,14 +60,19 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t size; pgoff_t size;
int err = -ENOMEM; int err = -ENOMEM;
pte_t *pte; pte_t *pte;
pgd_t *pgd;
pmd_t *pmd; pmd_t *pmd;
pud_t *pud;
pgd_t *pgd;
pte_t pte_val; pte_t pte_val;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
goto err_unlock;
pmd = pmd_alloc(mm, pgd, addr); pmd = pmd_alloc(mm, pud, addr);
if (!pmd) if (!pmd)
goto err_unlock; goto err_unlock;
...@@ -112,14 +117,19 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -112,14 +117,19 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
{ {
int err = -ENOMEM; int err = -ENOMEM;
pte_t *pte; pte_t *pte;
pgd_t *pgd;
pmd_t *pmd; pmd_t *pmd;
pud_t *pud;
pgd_t *pgd;
pte_t pte_val; pte_t pte_val;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
goto err_unlock;
pmd = pmd_alloc(mm, pgd, addr); pmd = pmd_alloc(mm, pud, addr);
if (!pmd) if (!pmd)
goto err_unlock; goto err_unlock;
......
This diff is collapsed.
...@@ -234,18 +234,29 @@ static struct mempolicy *mpol_new(int mode, unsigned long *nodes) ...@@ -234,18 +234,29 @@ static struct mempolicy *mpol_new(int mode, unsigned long *nodes)
/* Ensure all existing pages follow the policy. */ /* Ensure all existing pages follow the policy. */
static int static int
verify_pages(unsigned long addr, unsigned long end, unsigned long *nodes) verify_pages(struct mm_struct *mm,
unsigned long addr, unsigned long end, unsigned long *nodes)
{ {
while (addr < end) { while (addr < end) {
struct page *p; struct page *p;
pte_t *pte; pte_t *pte;
pmd_t *pmd; pmd_t *pmd;
pgd_t *pgd = pgd_offset_k(addr); pud_t *pud;
pgd_t *pgd;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd)) { if (pgd_none(*pgd)) {
addr = (addr + PGDIR_SIZE) & PGDIR_MASK; unsigned long next = (addr + PGDIR_SIZE) & PGDIR_MASK;
if (next > addr)
break;
addr = next;
continue;
}
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
addr = (addr + PUD_SIZE) & PUD_MASK;
continue; continue;
} }
pmd = pmd_offset(pgd, addr); pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) { if (pmd_none(*pmd)) {
addr = (addr + PMD_SIZE) & PMD_MASK; addr = (addr + PMD_SIZE) & PMD_MASK;
continue; continue;
...@@ -283,7 +294,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, ...@@ -283,7 +294,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
if (prev && prev->vm_end < vma->vm_start) if (prev && prev->vm_end < vma->vm_start)
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
err = verify_pages(vma->vm_start, vma->vm_end, nodes); err = verify_pages(vma->vm_mm,
vma->vm_start, vma->vm_end, nodes);
if (err) { if (err) {
first = ERR_PTR(err); first = ERR_PTR(err);
break; break;
......
...@@ -62,12 +62,38 @@ change_pte_range(pmd_t *pmd, unsigned long address, ...@@ -62,12 +62,38 @@ change_pte_range(pmd_t *pmd, unsigned long address,
} }
static inline void static inline void
change_pmd_range(pgd_t *pgd, unsigned long address, change_pmd_range(pud_t *pud, unsigned long address,
unsigned long size, pgprot_t newprot) unsigned long size, pgprot_t newprot)
{ {
pmd_t * pmd; pmd_t * pmd;
unsigned long end; unsigned long end;
if (pud_none(*pud))
return;
if (pud_bad(*pud)) {
pud_ERROR(*pud);
pud_clear(pud);
return;
}
pmd = pmd_offset(pud, address);
address &= ~PUD_MASK;
end = address + size;
if (end > PUD_SIZE)
end = PUD_SIZE;
do {
change_pte_range(pmd, address, end - address, newprot);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
}
static inline void
change_pud_range(pgd_t *pgd, unsigned long address,
unsigned long size, pgprot_t newprot)
{
pud_t * pud;
unsigned long end;
if (pgd_none(*pgd)) if (pgd_none(*pgd))
return; return;
if (pgd_bad(*pgd)) { if (pgd_bad(*pgd)) {
...@@ -75,15 +101,15 @@ change_pmd_range(pgd_t *pgd, unsigned long address, ...@@ -75,15 +101,15 @@ change_pmd_range(pgd_t *pgd, unsigned long address,
pgd_clear(pgd); pgd_clear(pgd);
return; return;
} }
pmd = pmd_offset(pgd, address); pud = pud_offset(pgd, address);
address &= ~PGDIR_MASK; address &= ~PGDIR_MASK;
end = address + size; end = address + size;
if (end > PGDIR_SIZE) if (end > PGDIR_SIZE)
end = PGDIR_SIZE; end = PGDIR_SIZE;
do { do {
change_pte_range(pmd, address, end - address, newprot); change_pmd_range(pud, address, end - address, newprot);
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PUD_SIZE) & PUD_MASK;
pmd++; pud++;
} while (address && (address < end)); } while (address && (address < end));
} }
...@@ -91,22 +117,25 @@ static void ...@@ -91,22 +117,25 @@ static void
change_protection(struct vm_area_struct *vma, unsigned long start, change_protection(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot) unsigned long end, pgprot_t newprot)
{ {
pgd_t *dir; struct mm_struct *mm = current->mm;
unsigned long beg = start; pgd_t *pgd;
unsigned long beg = start, next;
int i;
dir = pgd_offset(current->mm, start); pgd = pgd_offset(mm, start);
flush_cache_range(vma, beg, end); flush_cache_range(vma, beg, end);
if (start >= end) BUG_ON(start >= end);
BUG(); spin_lock(&mm->page_table_lock);
spin_lock(&current->mm->page_table_lock); for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
do { next = (start + PGDIR_SIZE) & PGDIR_MASK;
change_pmd_range(dir, start, end - start, newprot); if (next <= start || next > end)
start = (start + PGDIR_SIZE) & PGDIR_MASK; next = end;
dir++; change_pud_range(pgd, start, next - start, newprot);
} while (start && (start < end)); start = next;
pgd++;
}
flush_tlb_range(vma, beg, end); flush_tlb_range(vma, beg, end);
spin_unlock(&current->mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return;
} }
static int static int
......
...@@ -25,19 +25,24 @@ ...@@ -25,19 +25,24 @@
static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr) static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte = NULL; pte_t *pte = NULL;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd)) if (pgd_none(*pgd))
goto end; goto end;
if (pgd_bad(*pgd)) {
pgd_ERROR(*pgd); pud = pud_offset(pgd, addr);
pgd_clear(pgd); if (pud_none(*pud))
goto end;
if (pud_bad(*pud)) {
pud_ERROR(*pud);
pud_clear(pud);
goto end; goto end;
} }
pmd = pmd_offset(pgd, addr); pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) if (pmd_none(*pmd))
goto end; goto end;
if (pmd_bad(*pmd)) { if (pmd_bad(*pmd)) {
...@@ -58,12 +63,17 @@ static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr) ...@@ -58,12 +63,17 @@ static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr) static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd)) if (pgd_none(*pgd))
return NULL; return NULL;
pmd = pmd_offset(pgd, addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
return NULL;
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd)) if (!pmd_present(*pmd))
return NULL; return NULL;
return pte_offset_map(pmd, addr); return pte_offset_map(pmd, addr);
...@@ -71,10 +81,17 @@ static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr) ...@@ -71,10 +81,17 @@ static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte = NULL; pte_t *pte = NULL;
pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
return NULL;
pmd = pmd_alloc(mm, pud, addr);
if (pmd) if (pmd)
pte = pte_alloc_map(mm, pmd, addr); pte = pte_alloc_map(mm, pmd, addr);
return pte; return pte;
......
...@@ -67,13 +67,39 @@ static int filemap_sync_pte_range(pmd_t * pmd, ...@@ -67,13 +67,39 @@ static int filemap_sync_pte_range(pmd_t * pmd,
return error; return error;
} }
static inline int filemap_sync_pmd_range(pgd_t * pgd, static inline int filemap_sync_pmd_range(pud_t * pud,
unsigned long address, unsigned long end, unsigned long address, unsigned long end,
struct vm_area_struct *vma, unsigned int flags) struct vm_area_struct *vma, unsigned int flags)
{ {
pmd_t * pmd; pmd_t * pmd;
int error; int error;
if (pud_none(*pud))
return 0;
if (pud_bad(*pud)) {
pud_ERROR(*pud);
pud_clear(pud);
return 0;
}
pmd = pmd_offset(pud, address);
if ((address & PUD_MASK) != (end & PUD_MASK))
end = (address & PUD_MASK) + PUD_SIZE;
error = 0;
do {
error |= filemap_sync_pte_range(pmd, address, end, vma, flags);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
return error;
}
static inline int filemap_sync_pud_range(pgd_t *pgd,
unsigned long address, unsigned long end,
struct vm_area_struct *vma, unsigned int flags)
{
pud_t *pud;
int error;
if (pgd_none(*pgd)) if (pgd_none(*pgd))
return 0; return 0;
if (pgd_bad(*pgd)) { if (pgd_bad(*pgd)) {
...@@ -81,14 +107,14 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd, ...@@ -81,14 +107,14 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd,
pgd_clear(pgd); pgd_clear(pgd);
return 0; return 0;
} }
pmd = pmd_offset(pgd, address); pud = pud_offset(pgd, address);
if ((address & PGDIR_MASK) != (end & PGDIR_MASK)) if ((address & PGDIR_MASK) != (end & PGDIR_MASK))
end = (address & PGDIR_MASK) + PGDIR_SIZE; end = (address & PGDIR_MASK) + PGDIR_SIZE;
error = 0; error = 0;
do { do {
error |= filemap_sync_pte_range(pmd, address, end, vma, flags); error |= filemap_sync_pmd_range(pud, address, end, vma, flags);
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PUD_SIZE) & PUD_MASK;
pmd++; pud++;
} while (address && (address < end)); } while (address && (address < end));
return error; return error;
} }
...@@ -96,8 +122,10 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd, ...@@ -96,8 +122,10 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd,
static int filemap_sync(struct vm_area_struct * vma, unsigned long address, static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
size_t size, unsigned int flags) size_t size, unsigned int flags)
{ {
pgd_t * dir; pgd_t *pgd;
unsigned long end = address + size; unsigned long end = address + size;
unsigned long next;
int i;
int error = 0; int error = 0;
/* Aquire the lock early; it may be possible to avoid dropping /* Aquire the lock early; it may be possible to avoid dropping
...@@ -105,7 +133,7 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address, ...@@ -105,7 +133,7 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
*/ */
spin_lock(&vma->vm_mm->page_table_lock); spin_lock(&vma->vm_mm->page_table_lock);
dir = pgd_offset(vma->vm_mm, address); pgd = pgd_offset(vma->vm_mm, address);
flush_cache_range(vma, address, end); flush_cache_range(vma, address, end);
/* For hugepages we can't go walking the page table normally, /* For hugepages we can't go walking the page table normally,
...@@ -116,11 +144,14 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address, ...@@ -116,11 +144,14 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
if (address >= end) if (address >= end)
BUG(); BUG();
do { for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
error |= filemap_sync_pmd_range(dir, address, end, vma, flags); next = (address + PGDIR_SIZE) & PGDIR_MASK;
address = (address + PGDIR_SIZE) & PGDIR_MASK; if (next <= address || next > end)
dir++; next = end;
} while (address && (address < end)); error |= filemap_sync_pud_range(pgd, address, next, vma, flags);
address = next;
pgd++;
}
/* /*
* Why flush ? filemap_sync_pte already flushed the tlbs with the * Why flush ? filemap_sync_pte already flushed the tlbs with the
* dirty bits. * dirty bits.
......
...@@ -259,6 +259,7 @@ static int page_referenced_one(struct page *page, ...@@ -259,6 +259,7 @@ static int page_referenced_one(struct page *page,
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long address; unsigned long address;
pgd_t *pgd; pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
int referenced = 0; int referenced = 0;
...@@ -275,7 +276,11 @@ static int page_referenced_one(struct page *page, ...@@ -275,7 +276,11 @@ static int page_referenced_one(struct page *page,
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
goto out_unlock; goto out_unlock;
pmd = pmd_offset(pgd, address); pud = pud_offset(pgd, address);
if (!pud_present(*pud))
goto out_unlock;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd)) if (!pmd_present(*pmd))
goto out_unlock; goto out_unlock;
...@@ -505,6 +510,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -505,6 +510,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long address; unsigned long address;
pgd_t *pgd; pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
pte_t pteval; pte_t pteval;
...@@ -526,7 +532,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -526,7 +532,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
goto out_unlock; goto out_unlock;
pmd = pmd_offset(pgd, address); pud = pud_offset(pgd, address);
if (!pud_present(*pud))
goto out_unlock;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd)) if (!pmd_present(*pmd))
goto out_unlock; goto out_unlock;
...@@ -634,6 +644,7 @@ static void try_to_unmap_cluster(unsigned long cursor, ...@@ -634,6 +644,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd; pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
pte_t pteval; pte_t pteval;
...@@ -659,7 +670,11 @@ static void try_to_unmap_cluster(unsigned long cursor, ...@@ -659,7 +670,11 @@ static void try_to_unmap_cluster(unsigned long cursor,
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
goto out_unlock; goto out_unlock;
pmd = pmd_offset(pgd, address); pud = pud_offset(pgd, address);
if (!pud_present(*pud))
goto out_unlock;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd)) if (!pmd_present(*pmd))
goto out_unlock; goto out_unlock;
......
...@@ -486,27 +486,27 @@ static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, ...@@ -486,27 +486,27 @@ static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
} }
/* vma->vm_mm->page_table_lock is held */ /* vma->vm_mm->page_table_lock is held */
static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, static unsigned long unuse_pud(struct vm_area_struct * vma, pud_t *pud,
unsigned long address, unsigned long size, unsigned long address, unsigned long size, unsigned long offset,
swp_entry_t entry, struct page *page) swp_entry_t entry, struct page *page)
{ {
pmd_t * pmd; pmd_t * pmd;
unsigned long offset, end; unsigned long end;
unsigned long foundaddr; unsigned long foundaddr;
if (pgd_none(*dir)) if (pud_none(*pud))
return 0; return 0;
if (pgd_bad(*dir)) { if (pud_bad(*pud)) {
pgd_ERROR(*dir); pud_ERROR(*pud);
pgd_clear(dir); pud_clear(pud);
return 0; return 0;
} }
pmd = pmd_offset(dir, address); pmd = pmd_offset(pud, address);
offset = address & PGDIR_MASK; offset += address & PUD_MASK;
address &= ~PGDIR_MASK; address &= ~PUD_MASK;
end = address + size; end = address + size;
if (end > PGDIR_SIZE) if (end > PUD_SIZE)
end = PGDIR_SIZE; end = PUD_SIZE;
if (address >= end) if (address >= end)
BUG(); BUG();
do { do {
...@@ -520,13 +520,49 @@ static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, ...@@ -520,13 +520,49 @@ static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
return 0; return 0;
} }
/* vma->vm_mm->page_table_lock is held */
static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *pgd,
unsigned long address, unsigned long size,
swp_entry_t entry, struct page *page)
{
pud_t * pud;
unsigned long offset;
unsigned long foundaddr;
unsigned long end;
if (pgd_none(*pgd))
return 0;
if (pgd_bad(*pgd)) {
pgd_ERROR(*pgd);
pgd_clear(pgd);
return 0;
}
pud = pud_offset(pgd, address);
offset = address & PGDIR_MASK;
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
BUG_ON (address >= end);
do {
foundaddr = unuse_pud(vma, pud, address, end - address,
offset, entry, page);
if (foundaddr)
return foundaddr;
address = (address + PUD_SIZE) & PUD_MASK;
pud++;
} while (address && (address < end));
return 0;
}
/* vma->vm_mm->page_table_lock is held */ /* vma->vm_mm->page_table_lock is held */
static unsigned long unuse_vma(struct vm_area_struct * vma, static unsigned long unuse_vma(struct vm_area_struct * vma,
swp_entry_t entry, struct page *page) swp_entry_t entry, struct page *page)
{ {
pgd_t *pgdir; pgd_t *pgd;
unsigned long start, end; unsigned long start, end, next;
unsigned long foundaddr; unsigned long foundaddr;
int i;
if (page->mapping) { if (page->mapping) {
start = page_address_in_vma(page, vma); start = page_address_in_vma(page, vma);
...@@ -538,15 +574,18 @@ static unsigned long unuse_vma(struct vm_area_struct * vma, ...@@ -538,15 +574,18 @@ static unsigned long unuse_vma(struct vm_area_struct * vma,
start = vma->vm_start; start = vma->vm_start;
end = vma->vm_end; end = vma->vm_end;
} }
pgdir = pgd_offset(vma->vm_mm, start); pgd = pgd_offset(vma->vm_mm, start);
do { for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
foundaddr = unuse_pgd(vma, pgdir, start, end - start, next = (start + PGDIR_SIZE) & PGDIR_MASK;
entry, page); if (next > end || next <= start)
next = end;
foundaddr = unuse_pgd(vma, pgd, start, next - start, entry, page);
if (foundaddr) if (foundaddr)
return foundaddr; return foundaddr;
start = (start + PGDIR_SIZE) & PGDIR_MASK; start = next;
pgdir++; i++;
} while (start && (start < end)); pgd++;
}
return 0; return 0;
} }
......
...@@ -56,25 +56,25 @@ static void unmap_area_pte(pmd_t *pmd, unsigned long address, ...@@ -56,25 +56,25 @@ static void unmap_area_pte(pmd_t *pmd, unsigned long address,
} while (address < end); } while (address < end);
} }
static void unmap_area_pmd(pgd_t *dir, unsigned long address, static void unmap_area_pmd(pud_t *pud, unsigned long address,
unsigned long size) unsigned long size)
{ {
unsigned long end; unsigned long end;
pmd_t *pmd; pmd_t *pmd;
if (pgd_none(*dir)) if (pud_none(*pud))
return; return;
if (pgd_bad(*dir)) { if (pud_bad(*pud)) {
pgd_ERROR(*dir); pud_ERROR(*pud);
pgd_clear(dir); pud_clear(pud);
return; return;
} }
pmd = pmd_offset(dir, address); pmd = pmd_offset(pud, address);
address &= ~PGDIR_MASK; address &= ~PUD_MASK;
end = address + size; end = address + size;
if (end > PGDIR_SIZE) if (end > PUD_SIZE)
end = PGDIR_SIZE; end = PUD_SIZE;
do { do {
unmap_area_pte(pmd, address, end - address); unmap_area_pte(pmd, address, end - address);
...@@ -83,6 +83,33 @@ static void unmap_area_pmd(pgd_t *dir, unsigned long address, ...@@ -83,6 +83,33 @@ static void unmap_area_pmd(pgd_t *dir, unsigned long address,
} while (address < end); } while (address < end);
} }
static void unmap_area_pud(pgd_t *pgd, unsigned long address,
unsigned long size)
{
pud_t *pud;
unsigned long end;
if (pgd_none(*pgd))
return;
if (pgd_bad(*pgd)) {
pgd_ERROR(*pgd);
pgd_clear(pgd);
return;
}
pud = pud_offset(pgd, address);
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
unmap_area_pmd(pud, address, end - address);
address = (address + PUD_SIZE) & PUD_MASK;
pud++;
} while (address && (address < end));
}
static int map_area_pte(pte_t *pte, unsigned long address, static int map_area_pte(pte_t *pte, unsigned long address,
unsigned long size, pgprot_t prot, unsigned long size, pgprot_t prot,
struct page ***pages) struct page ***pages)
...@@ -96,7 +123,6 @@ static int map_area_pte(pte_t *pte, unsigned long address, ...@@ -96,7 +123,6 @@ static int map_area_pte(pte_t *pte, unsigned long address,
do { do {
struct page *page = **pages; struct page *page = **pages;
WARN_ON(!pte_none(*pte)); WARN_ON(!pte_none(*pte));
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
...@@ -115,11 +141,11 @@ static int map_area_pmd(pmd_t *pmd, unsigned long address, ...@@ -115,11 +141,11 @@ static int map_area_pmd(pmd_t *pmd, unsigned long address,
{ {
unsigned long base, end; unsigned long base, end;
base = address & PGDIR_MASK; base = address & PUD_MASK;
address &= ~PGDIR_MASK; address &= ~PUD_MASK;
end = address + size; end = address + size;
if (end > PGDIR_SIZE) if (end > PUD_SIZE)
end = PGDIR_SIZE; end = PUD_SIZE;
do { do {
pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address); pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
...@@ -134,19 +160,41 @@ static int map_area_pmd(pmd_t *pmd, unsigned long address, ...@@ -134,19 +160,41 @@ static int map_area_pmd(pmd_t *pmd, unsigned long address,
return 0; return 0;
} }
static int map_area_pud(pud_t *pud, unsigned long address,
unsigned long end, pgprot_t prot,
struct page ***pages)
{
do {
pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
if (!pmd)
return -ENOMEM;
if (map_area_pmd(pmd, address, end - address, prot, pages))
return -ENOMEM;
address = (address + PUD_SIZE) & PUD_MASK;
pud++;
} while (address && address < end);
return 0;
}
void unmap_vm_area(struct vm_struct *area) void unmap_vm_area(struct vm_struct *area)
{ {
unsigned long address = (unsigned long) area->addr; unsigned long address = (unsigned long) area->addr;
unsigned long end = (address + area->size); unsigned long end = (address + area->size);
pgd_t *dir; unsigned long next;
pgd_t *pgd;
int i;
dir = pgd_offset_k(address); pgd = pgd_offset_k(address);
flush_cache_vunmap(address, end); flush_cache_vunmap(address, end);
do { for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
unmap_area_pmd(dir, address, end - address); next = (address + PGDIR_SIZE) & PGDIR_MASK;
address = (address + PGDIR_SIZE) & PGDIR_MASK; if (next <= address || next > end)
dir++; next = end;
} while (address && (address < end)); unmap_area_pud(pgd, address, next - address);
address = next;
pgd++;
}
flush_tlb_kernel_range((unsigned long) area->addr, end); flush_tlb_kernel_range((unsigned long) area->addr, end);
} }
...@@ -154,25 +202,30 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) ...@@ -154,25 +202,30 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{ {
unsigned long address = (unsigned long) area->addr; unsigned long address = (unsigned long) area->addr;
unsigned long end = address + (area->size-PAGE_SIZE); unsigned long end = address + (area->size-PAGE_SIZE);
pgd_t *dir; unsigned long next;
pgd_t *pgd;
int err = 0; int err = 0;
int i;
dir = pgd_offset_k(address); pgd = pgd_offset_k(address);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
do { for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
pmd_t *pmd = pmd_alloc(&init_mm, dir, address); pud_t *pud = pud_alloc(&init_mm, pgd, address);
if (!pmd) { if (!pud) {
err = -ENOMEM; err = -ENOMEM;
break; break;
} }
if (map_area_pmd(pmd, address, end - address, prot, pages)) { next = (address + PGDIR_SIZE) & PGDIR_MASK;
if (next < address || next > end)
next = end;
if (map_area_pud(pud, address, next, prot, pages)) {
err = -ENOMEM; err = -ENOMEM;
break; break;
} }
address = (address + PGDIR_SIZE) & PGDIR_MASK; address = next;
dir++; pgd++;
} while (address && (address < end)); }
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
flush_cache_vmap((unsigned long) area->addr, end); flush_cache_vmap((unsigned long) area->addr, end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment