Commit 123e4df7 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 9 remove pte_chains

From: Hugh Dickins <hugh@veritas.com>

Lots of deletions: the next patch will put in the new anon rmap, which
should look clearer if first we remove all of the old pte-pointer-based
rmap from the core in this patch - which therefore leaves anonymous rmap
totally disabled, anon pages locked in memory until process frees them.

Leave arch files (and page table rmap) untouched for now, clean them up in
a later batch.  A few constructive changes amidst all the deletions:

Choose names (e.g.  page_add_anon_rmap) and args (e.g.  no more pteps) now
so we need not revisit so many files in the next patch.  Inline function
page_dup_rmap for fork's copy_page_range, simply bumps mapcount under lock.
 cond_resched_lock in copy_page_range.  Struct page rearranged: no pte
union, just mapcount moved next to atomic count, so two ints can occupy one
long on 64-bit; i386 struct page now 32 bytes even with PAE.  Never pass
PageReserved to page_remove_rmap, only do_wp_page did so.


From: Hugh Dickins <hugh@veritas.com>

  Move page_add_anon_rmap's BUG_ON(page_mapping(page)) inside the rmap_lock
  (well, might as well just check mapping if !mapcount then): if this page is
  being mapped or unmapped on another cpu at the same time, page_mapping's
  PageAnon(page) and page->mapping are volatile.

  But page_mapping(page) is used more widely: I've a nasty feeling that
  clear_page_anon, page_add_anon_rmap and/or page_mapping need barriers added
  (also in 2.6.6 itself),
parent b33a7bad
...@@ -293,53 +293,42 @@ EXPORT_SYMBOL(copy_strings_kernel); ...@@ -293,53 +293,42 @@ EXPORT_SYMBOL(copy_strings_kernel);
* This routine is used to map in a page into an address space: needed by * This routine is used to map in a page into an address space: needed by
* execve() for the initial stack and environment pages. * execve() for the initial stack and environment pages.
* *
* tsk->mmap_sem is held for writing. * tsk->mm->mmap_sem is held for writing.
*/ */
void put_dirty_page(struct task_struct *tsk, struct page *page, void put_dirty_page(struct task_struct *tsk, struct page *page,
unsigned long address, pgprot_t prot) unsigned long address, pgprot_t prot)
{ {
struct mm_struct *mm = tsk->mm;
pgd_t * pgd; pgd_t * pgd;
pmd_t * pmd; pmd_t * pmd;
pte_t * pte; pte_t * pte;
struct pte_chain *pte_chain;
pgd = pgd_offset(mm, address);
if (page_count(page) != 1) spin_lock(&mm->page_table_lock);
printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", pmd = pmd_alloc(mm, pgd, address);
page, address);
pgd = pgd_offset(tsk->mm, address);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
goto out_sig;
spin_lock(&tsk->mm->page_table_lock);
pmd = pmd_alloc(tsk->mm, pgd, address);
if (!pmd) if (!pmd)
goto out; goto out;
pte = pte_alloc_map(tsk->mm, pmd, address); pte = pte_alloc_map(mm, pmd, address);
if (!pte) if (!pte)
goto out; goto out;
if (!pte_none(*pte)) { if (!pte_none(*pte)) {
pte_unmap(pte); pte_unmap(pte);
goto out; goto out;
} }
mm->rss++;
lru_cache_add_active(page); lru_cache_add_active(page);
flush_dcache_page(page); flush_dcache_page(page);
set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
pte_chain = page_add_rmap(page, pte, pte_chain); page_add_anon_rmap(page, mm, address);
pte_unmap(pte); pte_unmap(pte);
tsk->mm->rss++; spin_unlock(&mm->page_table_lock);
spin_unlock(&tsk->mm->page_table_lock);
/* no need for flush_tlb */ /* no need for flush_tlb */
pte_chain_free(pte_chain);
return; return;
out: out:
spin_unlock(&tsk->mm->page_table_lock); spin_unlock(&mm->page_table_lock);
out_sig:
__free_page(page); __free_page(page);
force_sig(SIGKILL, tsk); force_sig(SIGKILL, tsk);
pte_chain_free(pte_chain);
return;
} }
int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
......
...@@ -147,8 +147,6 @@ struct vm_operations_struct { ...@@ -147,8 +147,6 @@ struct vm_operations_struct {
int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
}; };
/* forward declaration; pte_chain is meant to be internal to rmap.c */
struct pte_chain;
struct mmu_gather; struct mmu_gather;
struct inode; struct inode;
...@@ -170,28 +168,26 @@ typedef unsigned long page_flags_t; ...@@ -170,28 +168,26 @@ typedef unsigned long page_flags_t;
* *
* The first line is data used in page cache lookup, the second line * The first line is data used in page cache lookup, the second line
* is used for linear searches (eg. clock algorithm scans). * is used for linear searches (eg. clock algorithm scans).
*
* TODO: make this structure smaller, it could be as small as 32 bytes.
*/ */
struct page { struct page {
page_flags_t flags; /* atomic flags, some possibly page_flags_t flags; /* Atomic flags, some possibly
updated asynchronously */ * updated asynchronously */
atomic_t _count; /* Usage count, see below. */ atomic_t _count; /* Usage count, see below. */
struct address_space *mapping; /* The inode (or ...) we belong to. */ unsigned int mapcount; /* Count of ptes mapped in mms,
pgoff_t index; /* Our offset within mapping. */ * to show when page is mapped
struct list_head lru; /* Pageout list, eg. active_list; * & limit reverse map searches,
protected by zone->lru_lock !! */ * protected by PG_maplock.
union { */
struct pte_chain *chain;/* Reverse pte mapping pointer.
* protected by PG_chainlock */
pte_addr_t direct;
unsigned int mapcount; /* Count ptes mapped into mms */
} pte;
unsigned long private; /* Mapping-private opaque data: unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads * usually used for buffer_heads
* if PagePrivate set; used for * if PagePrivate set; used for
* swp_entry_t if PageSwapCache * swp_entry_t if PageSwapCache
*/ */
struct address_space *mapping; /* The inode (or ...) we belong to. */
pgoff_t index; /* Our offset within mapping. */
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
/* /*
* On machines where all RAM is mapped into kernel address space, * On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with * we can simply calculate the virtual address. On machines with
...@@ -440,13 +436,11 @@ static inline pgoff_t page_index(struct page *page) ...@@ -440,13 +436,11 @@ static inline pgoff_t page_index(struct page *page)
} }
/* /*
* Return true if this page is mapped into pagetables. Subtle: test pte.direct * Return true if this page is mapped into pagetables.
* rather than pte.chain. Because sometimes pte.direct is 64-bit, and .chain
* is only 32-bit.
*/ */
static inline int page_mapped(struct page *page) static inline int page_mapped(struct page *page)
{ {
return page->pte.direct != 0; return page->mapcount != 0;
} }
/* /*
......
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
#define PG_nosave 14 /* Used for system suspend/resume */ #define PG_nosave 14 /* Used for system suspend/resume */
#define PG_maplock 15 /* Lock bit for rmap to ptes */ #define PG_maplock 15 /* Lock bit for rmap to ptes */
#define PG_direct 16 /* ->pte_chain points directly at pte */ #define PG_swapcache 16 /* Swap page: swp_entry_t in private */
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */ #define PG_reclaim 18 /* To be reclaimed asap */
#define PG_compound 19 /* Part of a compound page */ #define PG_compound 19 /* Part of a compound page */
#define PG_anon 20 /* Anonymous page: anon_vma in mapping*/
#define PG_swapcache 21 /* Swap page: swp_entry_t in private */ #define PG_anon 20 /* Anonymous page: anonmm in mapping */
/* /*
...@@ -281,12 +281,6 @@ extern void get_full_page_state(struct page_state *ret); ...@@ -281,12 +281,6 @@ extern void get_full_page_state(struct page_state *ret);
#define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags) #define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags)
#define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags) #define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags)
#define PageDirect(page) test_bit(PG_direct, &(page)->flags)
#define SetPageDirect(page) set_bit(PG_direct, &(page)->flags)
#define TestSetPageDirect(page) test_and_set_bit(PG_direct, &(page)->flags)
#define ClearPageDirect(page) clear_bit(PG_direct, &(page)->flags)
#define TestClearPageDirect(page) test_and_clear_bit(PG_direct, &(page)->flags)
#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) #define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
......
...@@ -15,21 +15,25 @@ ...@@ -15,21 +15,25 @@
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
struct pte_chain; void fastcall page_add_anon_rmap(struct page *,
struct pte_chain *pte_chain_alloc(int gfp_flags); struct mm_struct *, unsigned long addr);
void __pte_chain_free(struct pte_chain *pte_chain); void fastcall page_add_file_rmap(struct page *);
void fastcall page_remove_rmap(struct page *);
static inline void pte_chain_free(struct pte_chain *pte_chain)
/**
* page_dup_rmap - duplicate pte mapping to a page
* @page: the page to add the mapping to
*
* For copy_page_range only: minimal extract from page_add_rmap,
* avoiding unnecessary tests (already checked) so it's quicker.
*/
static inline void page_dup_rmap(struct page *page)
{ {
if (pte_chain) page_map_lock(page);
__pte_chain_free(pte_chain); page->mapcount++;
page_map_unlock(page);
} }
struct pte_chain * fastcall
page_add_rmap(struct page *, pte_t *, struct pte_chain *);
void fastcall page_add_file_rmap(struct page *);
void fastcall page_remove_rmap(struct page *, pte_t *);
/* /*
* Called from mm/vmscan.c to handle paging out * Called from mm/vmscan.c to handle paging out
*/ */
......
...@@ -84,7 +84,6 @@ extern void signals_init(void); ...@@ -84,7 +84,6 @@ extern void signals_init(void);
extern void buffer_init(void); extern void buffer_init(void);
extern void pidhash_init(void); extern void pidhash_init(void);
extern void pidmap_init(void); extern void pidmap_init(void);
extern void pte_chain_init(void);
extern void radix_tree_init(void); extern void radix_tree_init(void);
extern void free_initmem(void); extern void free_initmem(void);
extern void populate_rootfs(void); extern void populate_rootfs(void);
...@@ -460,7 +459,6 @@ asmlinkage void __init start_kernel(void) ...@@ -460,7 +459,6 @@ asmlinkage void __init start_kernel(void)
calibrate_delay(); calibrate_delay();
pidmap_init(); pidmap_init();
pgtable_cache_init(); pgtable_cache_init();
pte_chain_init();
#ifdef CONFIG_X86 #ifdef CONFIG_X86
if (efi_enabled) if (efi_enabled)
efi_enter_virtual_mode(); efi_enter_virtual_mode();
......
...@@ -36,7 +36,7 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -36,7 +36,7 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (!PageReserved(page)) { if (!PageReserved(page)) {
if (pte_dirty(pte)) if (pte_dirty(pte))
set_page_dirty(page); set_page_dirty(page);
page_remove_rmap(page, ptep); page_remove_rmap(page);
page_cache_release(page); page_cache_release(page);
mm->rss--; mm->rss--;
} }
......
...@@ -217,20 +217,10 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, ...@@ -217,20 +217,10 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
unsigned long address = vma->vm_start; unsigned long address = vma->vm_start;
unsigned long end = vma->vm_end; unsigned long end = vma->vm_end;
unsigned long cow; unsigned long cow;
struct pte_chain *pte_chain = NULL;
if (is_vm_hugetlb_page(vma)) if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst, src, vma); return copy_hugetlb_page_range(dst, src, vma);
pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN);
if (!pte_chain) {
spin_unlock(&dst->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
spin_lock(&dst->page_table_lock);
if (!pte_chain)
goto nomem;
}
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
src_pgd = pgd_offset(src, address)-1; src_pgd = pgd_offset(src, address)-1;
dst_pgd = pgd_offset(dst, address)-1; dst_pgd = pgd_offset(dst, address)-1;
...@@ -329,35 +319,8 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; ...@@ -329,35 +319,8 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
pte = pte_mkold(pte); pte = pte_mkold(pte);
get_page(page); get_page(page);
dst->rss++; dst->rss++;
set_pte(dst_pte, pte); set_pte(dst_pte, pte);
if (PageAnon(page)) page_dup_rmap(page);
pte_chain = page_add_rmap(page,
dst_pte, pte_chain);
else
page_add_file_rmap(page);
if (pte_chain)
goto cont_copy_pte_range_noset;
pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN);
if (pte_chain)
goto cont_copy_pte_range_noset;
/*
* pte_chain allocation failed, and we need to
* run page reclaim.
*/
pte_unmap_nested(src_pte);
pte_unmap(dst_pte);
spin_unlock(&src->page_table_lock);
spin_unlock(&dst->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
spin_lock(&dst->page_table_lock);
if (!pte_chain)
goto nomem;
spin_lock(&src->page_table_lock);
dst_pte = pte_offset_map(dst_pmd, address);
src_pte = pte_offset_map_nested(src_pmd,
address);
cont_copy_pte_range_noset: cont_copy_pte_range_noset:
address += PAGE_SIZE; address += PAGE_SIZE;
if (address >= end) { if (address >= end) {
...@@ -371,7 +334,7 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; ...@@ -371,7 +334,7 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
pte_unmap_nested(src_pte-1); pte_unmap_nested(src_pte-1);
pte_unmap(dst_pte-1); pte_unmap(dst_pte-1);
spin_unlock(&src->page_table_lock); spin_unlock(&src->page_table_lock);
cond_resched_lock(&dst->page_table_lock);
cont_copy_pmd_range: cont_copy_pmd_range:
src_pmd++; src_pmd++;
dst_pmd++; dst_pmd++;
...@@ -380,10 +343,8 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; ...@@ -380,10 +343,8 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
out_unlock: out_unlock:
spin_unlock(&src->page_table_lock); spin_unlock(&src->page_table_lock);
out: out:
pte_chain_free(pte_chain);
return 0; return 0;
nomem: nomem:
pte_chain_free(pte_chain);
return -ENOMEM; return -ENOMEM;
} }
...@@ -449,7 +410,7 @@ static void zap_pte_range(struct mmu_gather *tlb, ...@@ -449,7 +410,7 @@ static void zap_pte_range(struct mmu_gather *tlb,
if (pte_young(pte) && page_mapping(page)) if (pte_young(pte) && page_mapping(page))
mark_page_accessed(page); mark_page_accessed(page);
tlb->freed++; tlb->freed++;
page_remove_rmap(page, ptep); page_remove_rmap(page);
tlb_remove_page(tlb, page); tlb_remove_page(tlb, page);
continue; continue;
} }
...@@ -1073,7 +1034,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1073,7 +1034,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
{ {
struct page *old_page, *new_page; struct page *old_page, *new_page;
unsigned long pfn = pte_pfn(pte); unsigned long pfn = pte_pfn(pte);
struct pte_chain *pte_chain;
pte_t entry; pte_t entry;
if (unlikely(!pfn_valid(pfn))) { if (unlikely(!pfn_valid(pfn))) {
...@@ -1112,9 +1072,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1112,9 +1072,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
page_cache_get(old_page); page_cache_get(old_page);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
goto no_pte_chain;
new_page = alloc_page(GFP_HIGHUSER); new_page = alloc_page(GFP_HIGHUSER);
if (!new_page) if (!new_page)
goto no_new_page; goto no_new_page;
...@@ -1128,10 +1085,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1128,10 +1085,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
if (pte_same(*page_table, pte)) { if (pte_same(*page_table, pte)) {
if (PageReserved(old_page)) if (PageReserved(old_page))
++mm->rss; ++mm->rss;
page_remove_rmap(old_page, page_table); else
page_remove_rmap(old_page);
break_cow(vma, new_page, address, page_table); break_cow(vma, new_page, address, page_table);
pte_chain = page_add_rmap(new_page, page_table, pte_chain);
lru_cache_add_active(new_page); lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, mm, address);
/* Free the old page.. */ /* Free the old page.. */
new_page = old_page; new_page = old_page;
...@@ -1140,12 +1098,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1140,12 +1098,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
page_cache_release(new_page); page_cache_release(new_page);
page_cache_release(old_page); page_cache_release(old_page);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return VM_FAULT_MINOR; return VM_FAULT_MINOR;
no_new_page: no_new_page:
pte_chain_free(pte_chain);
no_pte_chain:
page_cache_release(old_page); page_cache_release(old_page);
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
...@@ -1317,7 +1272,6 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1317,7 +1272,6 @@ static int do_swap_page(struct mm_struct * mm,
swp_entry_t entry = pte_to_swp_entry(orig_pte); swp_entry_t entry = pte_to_swp_entry(orig_pte);
pte_t pte; pte_t pte;
int ret = VM_FAULT_MINOR; int ret = VM_FAULT_MINOR;
struct pte_chain *pte_chain = NULL;
pte_unmap(page_table); pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
...@@ -1347,11 +1301,6 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1347,11 +1301,6 @@ static int do_swap_page(struct mm_struct * mm,
} }
mark_page_accessed(page); mark_page_accessed(page);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain) {
ret = VM_FAULT_OOM;
goto out;
}
lock_page(page); lock_page(page);
/* /*
...@@ -1383,14 +1332,13 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1383,14 +1332,13 @@ static int do_swap_page(struct mm_struct * mm,
flush_icache_page(vma, page); flush_icache_page(vma, page);
set_pte(page_table, pte); set_pte(page_table, pte);
pte_chain = page_add_rmap(page, page_table, pte_chain); page_add_anon_rmap(page, mm, address);
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte); update_mmu_cache(vma, address, pte);
pte_unmap(page_table); pte_unmap(page_table);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
out: out:
pte_chain_free(pte_chain);
return ret; return ret;
} }
...@@ -1406,19 +1354,6 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1406,19 +1354,6 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
{ {
pte_t entry; pte_t entry;
struct page * page = ZERO_PAGE(addr); struct page * page = ZERO_PAGE(addr);
struct pte_chain *pte_chain;
int ret;
pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN);
if (!pte_chain) {
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
goto no_mem;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, addr);
}
/* Read-only mapping of ZERO_PAGE. */ /* Read-only mapping of ZERO_PAGE. */
entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
...@@ -1441,7 +1376,6 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1441,7 +1376,6 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_unmap(page_table); pte_unmap(page_table);
page_cache_release(page); page_cache_release(page);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
ret = VM_FAULT_MINOR;
goto out; goto out;
} }
mm->rss++; mm->rss++;
...@@ -1450,24 +1384,19 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1450,24 +1384,19 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
vma); vma);
lru_cache_add_active(page); lru_cache_add_active(page);
mark_page_accessed(page); mark_page_accessed(page);
page_add_anon_rmap(page, mm, addr);
} }
set_pte(page_table, entry); set_pte(page_table, entry);
/* ignores ZERO_PAGE */
pte_chain = page_add_rmap(page, page_table, pte_chain);
pte_unmap(page_table); pte_unmap(page_table);
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry); update_mmu_cache(vma, addr, entry);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
ret = VM_FAULT_MINOR;
goto out;
no_mem:
ret = VM_FAULT_OOM;
out: out:
pte_chain_free(pte_chain); return VM_FAULT_MINOR;
return ret; no_mem:
return VM_FAULT_OOM;
} }
/* /*
...@@ -1489,7 +1418,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1489,7 +1418,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page * new_page; struct page * new_page;
struct address_space *mapping = NULL; struct address_space *mapping = NULL;
pte_t entry; pte_t entry;
struct pte_chain *pte_chain;
int sequence = 0; int sequence = 0;
int ret = VM_FAULT_MINOR; int ret = VM_FAULT_MINOR;
int anon = 0; int anon = 0;
...@@ -1514,10 +1442,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1514,10 +1442,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (new_page == NOPAGE_OOM) if (new_page == NOPAGE_OOM)
return VM_FAULT_OOM; return VM_FAULT_OOM;
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
goto oom;
/* /*
* Should we do an early C-O-W break? * Should we do an early C-O-W break?
*/ */
...@@ -1542,7 +1466,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1542,7 +1466,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
sequence = atomic_read(&mapping->truncate_count); sequence = atomic_read(&mapping->truncate_count);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
page_cache_release(new_page); page_cache_release(new_page);
pte_chain_free(pte_chain);
goto retry; goto retry;
} }
page_table = pte_offset_map(pmd, address); page_table = pte_offset_map(pmd, address);
...@@ -1568,8 +1491,7 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1568,8 +1491,7 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
set_pte(page_table, entry); set_pte(page_table, entry);
if (anon) { if (anon) {
lru_cache_add_active(new_page); lru_cache_add_active(new_page);
pte_chain = page_add_rmap(new_page, page_add_anon_rmap(new_page, mm, address);
page_table, pte_chain);
} else } else
page_add_file_rmap(new_page); page_add_file_rmap(new_page);
pte_unmap(page_table); pte_unmap(page_table);
...@@ -1589,7 +1511,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1589,7 +1511,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_release(new_page); page_cache_release(new_page);
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
out: out:
pte_chain_free(pte_chain);
return ret; return ret;
} }
......
...@@ -81,7 +81,7 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) ...@@ -81,7 +81,7 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
static void static void
copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr, copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr,
pte_t *src, pte_t *dst, struct pte_chain **pte_chainp) unsigned long new_addr, pte_t *src, pte_t *dst)
{ {
pte_t pte = ptep_clear_flush(vma, old_addr, src); pte_t pte = ptep_clear_flush(vma, old_addr, src);
set_pte(dst, pte); set_pte(dst, pte);
...@@ -91,8 +91,8 @@ copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -91,8 +91,8 @@ copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr,
if (pfn_valid(pfn)) { if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn); struct page *page = pfn_to_page(pfn);
if (PageAnon(page)) { if (PageAnon(page)) {
page_remove_rmap(page, src); page_remove_rmap(page);
*pte_chainp = page_add_rmap(page, dst, *pte_chainp); page_add_anon_rmap(page, vma->vm_mm, new_addr);
} }
} }
} }
...@@ -105,13 +105,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -105,13 +105,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
int error = 0; int error = 0;
pte_t *src, *dst; pte_t *src, *dst;
struct pte_chain *pte_chain;
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain) {
error = -ENOMEM;
goto out;
}
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
src = get_one_pte_map_nested(mm, old_addr); src = get_one_pte_map_nested(mm, old_addr);
if (src) { if (src) {
...@@ -133,8 +127,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -133,8 +127,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
*/ */
if (src) { if (src) {
if (dst) if (dst)
copy_one_pte(vma, old_addr, src, copy_one_pte(vma, old_addr, new_addr, src, dst);
dst, &pte_chain);
else else
error = -ENOMEM; error = -ENOMEM;
pte_unmap_nested(src); pte_unmap_nested(src);
...@@ -143,8 +136,6 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -143,8 +136,6 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
pte_unmap(dst); pte_unmap(dst);
} }
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
out:
return error; return error;
} }
......
...@@ -572,10 +572,6 @@ unsigned long get_unmapped_area(struct file *file, unsigned long addr, ...@@ -572,10 +572,6 @@ unsigned long get_unmapped_area(struct file *file, unsigned long addr,
return -ENOMEM; return -ENOMEM;
} }
void pte_chain_init(void)
{
}
void swap_unplug_io_fn(struct backing_dev_info *) void swap_unplug_io_fn(struct backing_dev_info *)
{ {
} }
...@@ -4,17 +4,14 @@ ...@@ -4,17 +4,14 @@
* Copyright 2001, Rik van Riel <riel@conectiva.com.br> * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
* Released under the General Public License (GPL). * Released under the General Public License (GPL).
* *
* * Simple, low overhead reverse mapping scheme.
* Simple, low overhead pte-based reverse mapping scheme. * Please try to keep this thing as modular as possible.
* This is kept modular because we may want to experiment
* with object-based reverse mapping schemes. Please try
* to keep this thing as modular as possible.
*/ */
/* /*
* Locking: * Locking:
* - the page->pte.chain is protected by the PG_maplock bit, * - the page->mapcount field is protected by the PG_maplock bit,
* which nests within the the mm->page_table_lock, * which nests within the mm->page_table_lock,
* which nests within the page lock. * which nests within the page lock.
* - because swapout locking is opposite to the locking order * - because swapout locking is opposite to the locking order
* in the page fault path, the swapout path uses trylocks * in the page fault path, the swapout path uses trylocks
...@@ -27,88 +24,15 @@ ...@@ -27,88 +24,15 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/rmap.h> #include <linux/rmap.h>
#include <linux/cache.h>
#include <linux/percpu.h>
#include <asm/pgalloc.h>
#include <asm/rmap.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
/*
* Something oopsable to put for now in the page->mapping
* of an anonymous page, to test that it is ignored.
*/
#define ANON_MAPPING_DEBUG ((struct address_space *) 0)
static inline void clear_page_anon(struct page *page) static inline void clear_page_anon(struct page *page)
{ {
BUG_ON(page->mapping != ANON_MAPPING_DEBUG);
page->mapping = NULL; page->mapping = NULL;
ClearPageAnon(page); ClearPageAnon(page);
} }
/*
* Shared pages have a chain of pte_chain structures, used to locate
* all the mappings to this page. We only need a pointer to the pte
* here, the page struct for the page table page contains the process
* it belongs to and the offset within that process.
*
* We use an array of pte pointers in this structure to minimise cache misses
* while traversing reverse maps.
*/
#define NRPTE ((L1_CACHE_BYTES - sizeof(unsigned long))/sizeof(pte_addr_t))
/*
* next_and_idx encodes both the address of the next pte_chain and the
* offset of the lowest-index used pte in ptes[] (which is equal also
* to the offset of the highest-index unused pte in ptes[], plus one).
*/
struct pte_chain {
unsigned long next_and_idx;
pte_addr_t ptes[NRPTE];
} ____cacheline_aligned;
kmem_cache_t *pte_chain_cache;
static inline struct pte_chain *pte_chain_next(struct pte_chain *pte_chain)
{
return (struct pte_chain *)(pte_chain->next_and_idx & ~NRPTE);
}
static inline struct pte_chain *pte_chain_ptr(unsigned long pte_chain_addr)
{
return (struct pte_chain *)(pte_chain_addr & ~NRPTE);
}
static inline int pte_chain_idx(struct pte_chain *pte_chain)
{
return pte_chain->next_and_idx & NRPTE;
}
static inline unsigned long
pte_chain_encode(struct pte_chain *pte_chain, int idx)
{
return (unsigned long)pte_chain | idx;
}
/*
* pte_chain list management policy:
*
* - If a page has a pte_chain list then it is shared by at least two processes,
* because a single sharing uses PageDirect. (Well, this isn't true yet,
* coz this code doesn't collapse singletons back to PageDirect on the remove
* path).
* - A pte_chain list has free space only in the head member - all succeeding
* members are 100% full.
* - If the head element has free space, it occurs in its leading slots.
* - All free space in the pte_chain is at the start of the head member.
* - Insertion into the pte_chain puts a pte pointer in the last free slot of
* the head member.
* - Removal from a pte chain moves the head pte of the head member onto the
* victim pte and frees the head member if it became empty.
*/
/** /**
** VM stuff below this comment ** VM stuff below this comment
**/ **/
...@@ -126,6 +50,11 @@ unsigned long vma_address(struct vm_area_struct *vma, pgoff_t pgoff) ...@@ -126,6 +50,11 @@ unsigned long vma_address(struct vm_area_struct *vma, pgoff_t pgoff)
address: -EFAULT; address: -EFAULT;
} }
/**
** Subfunctions of page_referenced: page_referenced_one called
** repeatedly from either page_referenced_anon or page_referenced_file.
**/
static int page_referenced_one(struct page *page, static int page_referenced_one(struct page *page,
struct mm_struct *mm, unsigned long address, struct mm_struct *mm, unsigned long address,
unsigned int *mapcount, int *failed) unsigned int *mapcount, int *failed)
...@@ -172,6 +101,11 @@ static int page_referenced_one(struct page *page, ...@@ -172,6 +101,11 @@ static int page_referenced_one(struct page *page,
return referenced; return referenced;
} }
static inline int page_referenced_anon(struct page *page)
{
return 1; /* until next patch */
}
/** /**
* page_referenced_file - referenced check for object-based rmap * page_referenced_file - referenced check for object-based rmap
* @page: the page we're checking references on. * @page: the page we're checking references on.
...@@ -188,7 +122,7 @@ static int page_referenced_one(struct page *page, ...@@ -188,7 +122,7 @@ static int page_referenced_one(struct page *page,
*/ */
static inline int page_referenced_file(struct page *page) static inline int page_referenced_file(struct page *page)
{ {
unsigned int mapcount = page->pte.mapcount; unsigned int mapcount = page->mapcount;
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -247,15 +181,11 @@ static inline int page_referenced_file(struct page *page) ...@@ -247,15 +181,11 @@ static inline int page_referenced_file(struct page *page)
* @page: the page to test * @page: the page to test
* *
* Quick test_and_clear_referenced for all mappings to a page, * Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page. * returns the number of ptes which referenced the page.
* Caller needs to hold the rmap lock. * Caller needs to hold the rmap lock.
*
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
*/ */
int fastcall page_referenced(struct page * page) int fastcall page_referenced(struct page *page)
{ {
struct pte_chain *pc;
int referenced = 0; int referenced = 0;
if (page_test_and_clear_young(page)) if (page_test_and_clear_young(page))
...@@ -264,97 +194,38 @@ int fastcall page_referenced(struct page * page) ...@@ -264,97 +194,38 @@ int fastcall page_referenced(struct page * page)
if (TestClearPageReferenced(page)) if (TestClearPageReferenced(page))
referenced++; referenced++;
if (!PageAnon(page)) { if (page->mapcount && page->mapping) {
if (page_mapped(page) && page->mapping) if (PageAnon(page))
referenced += page_referenced_anon(page);
else
referenced += page_referenced_file(page); referenced += page_referenced_file(page);
} else if (PageDirect(page)) {
pte_t *pte = rmap_ptep_map(page->pte.direct);
if (ptep_test_and_clear_young(pte))
referenced++;
rmap_ptep_unmap(pte);
} else {
int nr_chains = 0;
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
int i;
for (i = pte_chain_idx(pc); i < NRPTE; i++) {
pte_addr_t pte_paddr = pc->ptes[i];
pte_t *p;
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
rmap_ptep_unmap(p);
nr_chains++;
}
}
if (nr_chains == 1) {
pc = page->pte.chain;
page->pte.direct = pc->ptes[NRPTE-1];
SetPageDirect(page);
pc->ptes[NRPTE-1] = 0;
__pte_chain_free(pc);
}
} }
return referenced; return referenced;
} }
/** /**
* page_add_rmap - add reverse mapping entry to an anonymous page * page_add_anon_rmap - add pte mapping to an anonymous page
* @page: the page to add the mapping to * @page: the page to add the mapping to
* @ptep: the page table entry mapping this page * @mm: the mm in which the mapping is added
* @address: the user virtual address mapped
* *
* Add a new pte reverse mapping to a page.
* The caller needs to hold the mm->page_table_lock. * The caller needs to hold the mm->page_table_lock.
*/ */
struct pte_chain * fastcall void fastcall page_add_anon_rmap(struct page *page,
page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain) struct mm_struct *mm, unsigned long address)
{ {
pte_addr_t pte_paddr = ptep_to_paddr(ptep); BUG_ON(PageReserved(page));
struct pte_chain *cur_pte_chain;
if (PageReserved(page))
return pte_chain;
page_map_lock(page); page_map_lock(page);
if (!page->mapcount) {
if (page->pte.direct == 0) { BUG_ON(page->mapping);
page->pte.direct = pte_paddr;
SetPageDirect(page);
SetPageAnon(page); SetPageAnon(page);
page->mapping = ANON_MAPPING_DEBUG; page->index = address & PAGE_MASK;
page->mapping = (void *) mm; /* until next patch */
inc_page_state(nr_mapped); inc_page_state(nr_mapped);
goto out;
}
if (PageDirect(page)) {
/* Convert a direct pointer into a pte_chain */
ClearPageDirect(page);
pte_chain->ptes[NRPTE-1] = page->pte.direct;
pte_chain->ptes[NRPTE-2] = pte_paddr;
pte_chain->next_and_idx = pte_chain_encode(NULL, NRPTE-2);
page->pte.direct = 0;
page->pte.chain = pte_chain;
pte_chain = NULL; /* We consumed it */
goto out;
} }
page->mapcount++;
cur_pte_chain = page->pte.chain;
if (cur_pte_chain->ptes[0]) { /* It's full */
pte_chain->next_and_idx = pte_chain_encode(cur_pte_chain,
NRPTE - 1);
page->pte.chain = pte_chain;
pte_chain->ptes[NRPTE-1] = pte_paddr;
pte_chain = NULL; /* We consumed it */
goto out;
}
cur_pte_chain->ptes[pte_chain_idx(cur_pte_chain) - 1] = pte_paddr;
cur_pte_chain->next_and_idx--;
out:
page_map_unlock(page); page_map_unlock(page);
return pte_chain;
} }
/** /**
...@@ -370,160 +241,39 @@ void fastcall page_add_file_rmap(struct page *page) ...@@ -370,160 +241,39 @@ void fastcall page_add_file_rmap(struct page *page)
return; return;
page_map_lock(page); page_map_lock(page);
if (!page_mapped(page)) if (!page->mapcount)
inc_page_state(nr_mapped); inc_page_state(nr_mapped);
page->pte.mapcount++; page->mapcount++;
page_map_unlock(page); page_map_unlock(page);
} }
/** /**
* page_remove_rmap - take down reverse mapping to a page * page_remove_rmap - take down pte mapping from a page
* @page: page to remove mapping from * @page: page to remove mapping from
* @ptep: page table entry to remove
* *
* Removes the reverse mapping from the pte_chain of the page,
* after that the caller can clear the page table entry and free
* the page.
* Caller needs to hold the mm->page_table_lock. * Caller needs to hold the mm->page_table_lock.
*/ */
void fastcall page_remove_rmap(struct page *page, pte_t *ptep) void fastcall page_remove_rmap(struct page *page)
{ {
pte_addr_t pte_paddr = ptep_to_paddr(ptep); BUG_ON(PageReserved(page));
struct pte_chain *pc; BUG_ON(!page->mapcount);
if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
return;
page_map_lock(page); page_map_lock(page);
page->mapcount--;
if (!page_mapped(page)) if (!page->mapcount) {
goto out_unlock; /* remap_page_range() from a driver? */
if (!PageAnon(page)) {
page->pte.mapcount--;
} else if (PageDirect(page)) {
if (page->pte.direct == pte_paddr) {
page->pte.direct = 0;
ClearPageDirect(page);
goto out;
}
} else {
struct pte_chain *start = page->pte.chain;
struct pte_chain *next;
int victim_i = pte_chain_idx(start);
for (pc = start; pc; pc = next) {
int i;
next = pte_chain_next(pc);
if (next)
prefetch(next);
for (i = pte_chain_idx(pc); i < NRPTE; i++) {
pte_addr_t pa = pc->ptes[i];
if (pa != pte_paddr)
continue;
pc->ptes[i] = start->ptes[victim_i];
start->ptes[victim_i] = 0;
if (victim_i == NRPTE-1) {
/* Emptied a pte_chain */
page->pte.chain = pte_chain_next(start);
__pte_chain_free(start);
} else {
start->next_and_idx++;
}
goto out;
}
}
}
out:
if (!page_mapped(page)) {
if (page_test_and_clear_dirty(page)) if (page_test_and_clear_dirty(page))
set_page_dirty(page); set_page_dirty(page);
if (PageAnon(page)) if (PageAnon(page))
clear_page_anon(page); clear_page_anon(page);
dec_page_state(nr_mapped); dec_page_state(nr_mapped);
} }
out_unlock:
page_map_unlock(page); page_map_unlock(page);
} }
/** /**
* try_to_unmap_anon_one - worker function for try_to_unmap ** Subfunctions of try_to_unmap: try_to_unmap_one called
* @page: page to unmap ** repeatedly from either try_to_unmap_anon or try_to_unmap_file.
* @ptep: page table entry to unmap from page **/
*
* Internal helper function for try_to_unmap, called for each page
* table entry mapping a page. Because locking order here is opposite
* to the locking order used by the page fault path, we use trylocks.
* Locking:
* page lock shrink_list(), trylock
* rmap lock shrink_list()
* mm->page_table_lock try_to_unmap_one(), trylock
*/
static int fastcall try_to_unmap_anon_one(struct page * page, pte_addr_t paddr)
{
pte_t *ptep = rmap_ptep_map(paddr);
unsigned long address = ptep_to_address(ptep);
struct mm_struct * mm = ptep_to_mm(ptep);
struct vm_area_struct * vma;
pte_t pte;
int ret;
if (!mm)
BUG();
/*
* We need the page_table_lock to protect us from page faults,
* munmap, fork, etc...
*/
if (!spin_trylock(&mm->page_table_lock)) {
rmap_ptep_unmap(ptep);
return SWAP_AGAIN;
}
/* unmap_vmas drops page_table_lock with vma unlinked */
vma = find_vma(mm, address);
if (!vma) {
ret = SWAP_FAIL;
goto out_unlock;
}
/* The page is mlock()d, we cannot swap it out. */
if (vma->vm_flags & VM_LOCKED) {
ret = SWAP_FAIL;
goto out_unlock;
}
/* Nuke the page table entry. */
flush_cache_page(vma, address);
pte = ptep_clear_flush(vma, address, ptep);
{
swp_entry_t entry = { .val = page->private };
/*
* Store the swap location in the pte.
* See handle_pte_fault() ...
*/
BUG_ON(!PageSwapCache(page));
swap_duplicate(entry);
set_pte(ptep, swp_entry_to_pte(entry));
BUG_ON(pte_file(*ptep));
}
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pte))
set_page_dirty(page);
mm->rss--;
page_cache_release(page);
ret = SWAP_SUCCESS;
out_unlock:
rmap_ptep_unmap(ptep);
spin_unlock(&mm->page_table_lock);
return ret;
}
static int try_to_unmap_one(struct page *page, static int try_to_unmap_one(struct page *page,
struct mm_struct *mm, unsigned long address, struct mm_struct *mm, unsigned long address,
...@@ -579,8 +329,8 @@ static int try_to_unmap_one(struct page *page, ...@@ -579,8 +329,8 @@ static int try_to_unmap_one(struct page *page,
set_page_dirty(page); set_page_dirty(page);
mm->rss--; mm->rss--;
BUG_ON(!page->pte.mapcount); BUG_ON(!page->mapcount);
page->pte.mapcount--; page->mapcount--;
page_cache_release(page); page_cache_release(page);
out_unmap: out_unmap:
...@@ -683,7 +433,7 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor, ...@@ -683,7 +433,7 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor,
if (pte_dirty(pteval)) if (pte_dirty(pteval))
set_page_dirty(page); set_page_dirty(page);
page_remove_rmap(page, pte); page_remove_rmap(page);
page_cache_release(page); page_cache_release(page);
mm->rss--; mm->rss--;
(*mapcount)--; (*mapcount)--;
...@@ -696,6 +446,11 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor, ...@@ -696,6 +446,11 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor,
return SWAP_AGAIN; return SWAP_AGAIN;
} }
static inline int try_to_unmap_anon(struct page *page)
{
return SWAP_FAIL; /* until next patch */
}
/** /**
* try_to_unmap_file - unmap file page using the object-based rmap method * try_to_unmap_file - unmap file page using the object-based rmap method
* @page: the page to unmap * @page: the page to unmap
...@@ -710,7 +465,7 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor, ...@@ -710,7 +465,7 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor,
*/ */
static inline int try_to_unmap_file(struct page *page) static inline int try_to_unmap_file(struct page *page)
{ {
unsigned int mapcount = page->pte.mapcount; unsigned int mapcount = page->mapcount;
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -835,73 +590,20 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -835,73 +590,20 @@ static inline int try_to_unmap_file(struct page *page)
* SWAP_AGAIN - we missed a trylock, try again later * SWAP_AGAIN - we missed a trylock, try again later
* SWAP_FAIL - the page is unswappable * SWAP_FAIL - the page is unswappable
*/ */
int fastcall try_to_unmap(struct page * page) int fastcall try_to_unmap(struct page *page)
{ {
struct pte_chain *pc, *next_pc, *start; int ret;
int ret = SWAP_SUCCESS;
int victim_i;
BUG_ON(PageReserved(page)); BUG_ON(PageReserved(page));
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(!page_mapped(page)); BUG_ON(!page->mapcount);
if (!PageAnon(page)) { if (PageAnon(page))
ret = try_to_unmap_anon(page);
else
ret = try_to_unmap_file(page); ret = try_to_unmap_file(page);
goto out;
}
if (PageDirect(page)) { if (!page->mapcount) {
ret = try_to_unmap_anon_one(page, page->pte.direct);
if (ret == SWAP_SUCCESS) {
page->pte.direct = 0;
ClearPageDirect(page);
}
goto out;
}
start = page->pte.chain;
victim_i = pte_chain_idx(start);
for (pc = start; pc; pc = next_pc) {
int i;
next_pc = pte_chain_next(pc);
if (next_pc)
prefetch(next_pc);
for (i = pte_chain_idx(pc); i < NRPTE; i++) {
pte_addr_t pte_paddr = pc->ptes[i];
switch (try_to_unmap_anon_one(page, pte_paddr)) {
case SWAP_SUCCESS:
/*
* Release a slot. If we're releasing the
* first pte in the first pte_chain then
* pc->ptes[i] and start->ptes[victim_i] both
* refer to the same thing. It works out.
*/
pc->ptes[i] = start->ptes[victim_i];
start->ptes[victim_i] = 0;
victim_i++;
if (victim_i == NRPTE) {
page->pte.chain = pte_chain_next(start);
__pte_chain_free(start);
start = page->pte.chain;
victim_i = 0;
} else {
start->next_and_idx++;
}
break;
case SWAP_AGAIN:
/* Skip this pte, remembering status. */
ret = SWAP_AGAIN;
continue;
case SWAP_FAIL:
ret = SWAP_FAIL;
goto out;
}
}
}
out:
if (!page_mapped(page)) {
if (page_test_and_clear_dirty(page)) if (page_test_and_clear_dirty(page))
set_page_dirty(page); set_page_dirty(page);
if (PageAnon(page)) if (PageAnon(page))
...@@ -911,73 +613,3 @@ int fastcall try_to_unmap(struct page * page) ...@@ -911,73 +613,3 @@ int fastcall try_to_unmap(struct page * page)
} }
return ret; return ret;
} }
/**
** No more VM stuff below this comment, only pte_chain helper
** functions.
**/
static void pte_chain_ctor(void *p, kmem_cache_t *cachep, unsigned long flags)
{
struct pte_chain *pc = p;
memset(pc, 0, sizeof(*pc));
}
DEFINE_PER_CPU(struct pte_chain *, local_pte_chain) = 0;
/**
* __pte_chain_free - free pte_chain structure
* @pte_chain: pte_chain struct to free
*/
void __pte_chain_free(struct pte_chain *pte_chain)
{
struct pte_chain **pte_chainp;
pte_chainp = &get_cpu_var(local_pte_chain);
if (pte_chain->next_and_idx)
pte_chain->next_and_idx = 0;
if (*pte_chainp)
kmem_cache_free(pte_chain_cache, *pte_chainp);
*pte_chainp = pte_chain;
put_cpu_var(local_pte_chain);
}
/*
* pte_chain_alloc(): allocate a pte_chain structure for use by page_add_rmap().
*
* The caller of page_add_rmap() must perform the allocation because
* page_add_rmap() is invariably called under spinlock. Often, page_add_rmap()
* will not actually use the pte_chain, because there is space available in one
* of the existing pte_chains which are attached to the page. So the case of
* allocating and then freeing a single pte_chain is specially optimised here,
* with a one-deep per-cpu cache.
*/
struct pte_chain *pte_chain_alloc(int gfp_flags)
{
struct pte_chain *ret;
struct pte_chain **pte_chainp;
might_sleep_if(gfp_flags & __GFP_WAIT);
pte_chainp = &get_cpu_var(local_pte_chain);
if (*pte_chainp) {
ret = *pte_chainp;
*pte_chainp = NULL;
put_cpu_var(local_pte_chain);
} else {
put_cpu_var(local_pte_chain);
ret = kmem_cache_alloc(pte_chain_cache, gfp_flags);
}
return ret;
}
void __init pte_chain_init(void)
{
pte_chain_cache = kmem_cache_create( "pte_chain",
sizeof(struct pte_chain),
sizeof(struct pte_chain),
SLAB_PANIC,
pte_chain_ctor,
NULL);
}
...@@ -427,19 +427,19 @@ void free_swap_and_cache(swp_entry_t entry) ...@@ -427,19 +427,19 @@ void free_swap_and_cache(swp_entry_t entry)
/* vma->vm_mm->page_table_lock is held */ /* vma->vm_mm->page_table_lock is held */
static void static void
unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) swp_entry_t entry, struct page *page)
{ {
vma->vm_mm->rss++; vma->vm_mm->rss++;
get_page(page); get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
*pte_chainp = page_add_rmap(page, dir, *pte_chainp); page_add_anon_rmap(page, vma->vm_mm, address);
swap_free(entry); swap_free(entry);
} }
/* vma->vm_mm->page_table_lock is held */ /* vma->vm_mm->page_table_lock is held */
static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset, unsigned long address, unsigned long size, unsigned long offset,
swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) swp_entry_t entry, struct page *page)
{ {
pte_t * pte; pte_t * pte;
unsigned long end; unsigned long end;
...@@ -464,8 +464,7 @@ static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, ...@@ -464,8 +464,7 @@ static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
* Test inline before going to call unuse_pte. * Test inline before going to call unuse_pte.
*/ */
if (unlikely(pte_same(*pte, swp_pte))) { if (unlikely(pte_same(*pte, swp_pte))) {
unuse_pte(vma, offset + address, pte, unuse_pte(vma, offset + address, pte, entry, page);
entry, page, pte_chainp);
pte_unmap(pte); pte_unmap(pte);
return 1; return 1;
} }
...@@ -479,7 +478,7 @@ static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, ...@@ -479,7 +478,7 @@ static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
/* vma->vm_mm->page_table_lock is held */ /* vma->vm_mm->page_table_lock is held */
static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size, unsigned long address, unsigned long size,
swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) swp_entry_t entry, struct page *page)
{ {
pmd_t * pmd; pmd_t * pmd;
unsigned long offset, end; unsigned long offset, end;
...@@ -501,7 +500,7 @@ static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, ...@@ -501,7 +500,7 @@ static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
BUG(); BUG();
do { do {
if (unuse_pmd(vma, pmd, address, end - address, if (unuse_pmd(vma, pmd, address, end - address,
offset, entry, page, pte_chainp)) offset, entry, page))
return 1; return 1;
address = (address + PMD_SIZE) & PMD_MASK; address = (address + PMD_SIZE) & PMD_MASK;
pmd++; pmd++;
...@@ -511,15 +510,14 @@ static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, ...@@ -511,15 +510,14 @@ static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
/* vma->vm_mm->page_table_lock is held */ /* vma->vm_mm->page_table_lock is held */
static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) swp_entry_t entry, struct page *page)
{ {
unsigned long start = vma->vm_start, end = vma->vm_end; unsigned long start = vma->vm_start, end = vma->vm_end;
if (start >= end) if (start >= end)
BUG(); BUG();
do { do {
if (unuse_pgd(vma, pgdir, start, end - start, if (unuse_pgd(vma, pgdir, start, end - start, entry, page))
entry, page, pte_chainp))
return 1; return 1;
start = (start + PGDIR_SIZE) & PGDIR_MASK; start = (start + PGDIR_SIZE) & PGDIR_MASK;
pgdir++; pgdir++;
...@@ -531,11 +529,6 @@ static int unuse_process(struct mm_struct * mm, ...@@ -531,11 +529,6 @@ static int unuse_process(struct mm_struct * mm,
swp_entry_t entry, struct page* page) swp_entry_t entry, struct page* page)
{ {
struct vm_area_struct* vma; struct vm_area_struct* vma;
struct pte_chain *pte_chain;
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
return -ENOMEM;
/* /*
* Go through process' page directory. * Go through process' page directory.
...@@ -543,11 +536,10 @@ static int unuse_process(struct mm_struct * mm, ...@@ -543,11 +536,10 @@ static int unuse_process(struct mm_struct * mm,
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) { for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start); pgd_t * pgd = pgd_offset(mm, vma->vm_start);
if (unuse_vma(vma, pgd, entry, page, &pte_chain)) if (unuse_vma(vma, pgd, entry, page))
break; break;
} }
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment