Commit 02eaba7f authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] fix swapcache packing in the radix tree

First some terminology: this patch introduces a kernel-wide `pgoff_t'
type.  It is the index of a page into the pagecache.  The thing at
page->index.  For most mappings it is also the offset of the page into
that mapping.  This type has a very distinct function in the kernel and
it needs a name.  I don't have any particular plans to go and migrate
everything so we can support 64-bit pagecache indices on x86, but this
would be the way to do it.

This patch improves the packing density of swapcache pages in the radix
tree.

A swapcache page is identified by the `swap type' (indexes the swap
device) and the `offset' (into that swap device).  These two numbers
are encoded into a `swp_entry_t' machine word in arch-specific code
because the resulting number is placed into pagetables in a form which
will generate a fault.

The kernel also need to generate a pgoff_t for that page to index it
into the swapper_space radix tree.  That pgoff_t is usually
bitwise-identical to the swp_entry_t.  That worked OK when the
pagecache was using a hash.  But with a radix tree, it produces
catastrophically bad results.

x86 (and many other architectures) place the `type' field into the
low-order bits of the swp_entry_t.  So *all* swapcache pages are
basically identical in the eight low-order bits.  This produces a very
sparse radix tree for swapcache.  I'm observing packing densities of 1%
to 2%: so the typical 128-slot radix tree node has only one or two
pages in it.

The end result is that the kernel needs to allocate approximately one
new radix-tree node for each page which is added to the swapcache.  So
no wonder we're having radix-tree node exhaustion during swapout!
(It's actually quite encouraging that the kernel works as well as it
does).

The patch changes the encoding of the swp_entry_t so that its
most-significant bits contain the `type' field and the
least-significant bits contain the `offset' field, right-aligned.

That is: the encoding in swp_entry_t is now arch-independent.  The new
file <linux/swapops.h> has conversion functions which convert the
swp_entry_t to and from its machine pte representation.

Packing density in the swapper_space mapping goes up to around 90%
(observed) and the kernel is tons happier under swap load.


An alternative approach would be to create new conversion functions
which convert an arch-specific swp_entry_t to and from a pgoff_t.  I
tried that.  It worked, but I liked it less.
parent 0f2b38d5
...@@ -315,7 +315,7 @@ void __init atari_stram_reserve_pages(void *start_mem) ...@@ -315,7 +315,7 @@ void __init atari_stram_reserve_pages(void *start_mem)
otherwise just use the end of kernel data (= start_mem) */ otherwise just use the end of kernel data (= start_mem) */
swap_start = !kernel_in_stram ? stram_start + PAGE_SIZE : start_mem; swap_start = !kernel_in_stram ? stram_start + PAGE_SIZE : start_mem;
/* decrement by one page, rest of kernel assumes that first swap page /* decrement by one page, rest of kernel assumes that first swap page
* is always reserved and maybe doesn't handle SWP_ENTRY == 0 * is always reserved and maybe doesn't handle swp_entry == 0
* correctly */ * correctly */
swap_start -= PAGE_SIZE; swap_start -= PAGE_SIZE;
swap_end = stram_end; swap_end = stram_end;
...@@ -749,7 +749,7 @@ static int unswap_by_read(unsigned short *map, unsigned long max, ...@@ -749,7 +749,7 @@ static int unswap_by_read(unsigned short *map, unsigned long max,
} }
if (map[i]) { if (map[i]) {
entry = SWP_ENTRY(stram_swap_type, i); entry = swp_entry(stram_swap_type, i);
DPRINTK("unswap: map[i=%lu]=%u nr_swap=%u\n", DPRINTK("unswap: map[i=%lu]=%u nr_swap=%u\n",
i, map[i], nr_swap_pages); i, map[i], nr_swap_pages);
......
...@@ -340,11 +340,11 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, ...@@ -340,11 +340,11 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{ pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; } { pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; }
#define SWP_TYPE(x) (((x).val >> 32) & 0xff) #define __swp_type(x) (((x).val >> 32) & 0xff)
#define SWP_OFFSET(x) ((x).val >> 40) #define __swp_offset(x) ((x).val >> 40)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) #define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#ifndef CONFIG_DISCONTIGMEM #ifndef CONFIG_DISCONTIGMEM
#define kern_addr_valid(addr) (1) #define kern_addr_valid(addr) (1)
......
...@@ -142,11 +142,11 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; ...@@ -142,11 +142,11 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
* *
* We support up to 32GB of swap on 4k machines * We support up to 32GB of swap on 4k machines
*/ */
#define SWP_TYPE(x) (((x).val >> 2) & 0x7f) #define __swp_type(x) (((x).val >> 2) & 0x7f)
#define SWP_OFFSET(x) ((x).val >> 9) #define __swp_offset(x) ((x).val >> 9)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 9) }) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 9) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
/* FIXME: this is not correct */ /* FIXME: this is not correct */
......
...@@ -500,11 +500,11 @@ static inline void update_mmu_cache(struct vm_area_struct * vma, ...@@ -500,11 +500,11 @@ static inline void update_mmu_cache(struct vm_area_struct * vma,
/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
/* Since the PAGE_PRESENT bit is bit 4, we can use the bits above */ /* Since the PAGE_PRESENT bit is bit 4, we can use the bits above */
#define SWP_TYPE(x) (((x).val >> 5) & 0x7f) #define __swp_type(x) (((x).val >> 5) & 0x7f)
#define SWP_OFFSET(x) ((x).val >> 12) #define __swp_offset(x) ((x).val >> 12)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 5) | ((offset) << 12) }) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 5) | ((offset) << 12) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define kern_addr_valid(addr) (1) #define kern_addr_valid(addr) (1)
......
...@@ -269,11 +269,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -269,11 +269,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define update_mmu_cache(vma,address,pte) do { } while (0) #define update_mmu_cache(vma,address,pte) do { } while (0)
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) #define __swp_type(x) (((x).val >> 1) & 0x3f)
#define SWP_OFFSET(x) ((x).val >> 8) #define __swp_offset(x) ((x).val >> 8)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -402,11 +402,11 @@ pte_same (pte_t a, pte_t b) ...@@ -402,11 +402,11 @@ pte_same (pte_t a, pte_t b)
extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void paging_init (void); extern void paging_init (void);
#define SWP_TYPE(entry) (((entry).val >> 1) & 0xff) #define __swp_type(entry) (((entry).val >> 1) & 0xff)
#define SWP_OFFSET(entry) (((entry).val << 1) >> 10) #define __swp_offset(entry) (((entry).val << 1) >> 10)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 9) }) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 9) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define io_remap_page_range remap_page_range /* XXX is this right? */ #define io_remap_page_range remap_page_range /* XXX is this right? */
......
...@@ -145,20 +145,20 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, ...@@ -145,20 +145,20 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
#ifdef CONFIG_SUN3 #ifdef CONFIG_SUN3
/* Macros to (de)construct the fake PTEs representing swap pages. */ /* Macros to (de)construct the fake PTEs representing swap pages. */
#define SWP_TYPE(x) ((x).val & 0x7F) #define __swp_type(x) ((x).val & 0x7F)
#define SWP_OFFSET(x) (((x).val) >> 7) #define __swp_offset(x) (((x).val) >> 7)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) | ((offset) << 7)) }) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) | ((offset) << 7)) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#else #else
/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
#define SWP_TYPE(x) (((x).val >> 1) & 0xff) #define __swp_type(x) (((x).val >> 1) & 0xff)
#define SWP_OFFSET(x) ((x).val >> 10) #define __swp_offset(x) ((x).val >> 10)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 10) }) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 10) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#endif /* CONFIG_SUN3 */ #endif /* CONFIG_SUN3 */
......
...@@ -493,12 +493,11 @@ extern void paging_init(void); ...@@ -493,12 +493,11 @@ extern void paging_init(void);
extern void update_mmu_cache(struct vm_area_struct *vma, extern void update_mmu_cache(struct vm_area_struct *vma,
unsigned long address, pte_t pte); unsigned long address, pte_t pte);
#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) #define __swp_type(x) (((x).val >> 1) & 0x3f)
#define SWP_OFFSET(x) ((x).val >> 8) #define __swp_offset(x) ((x).val >> 8)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define kern_addr_valid(addr) (1) #define kern_addr_valid(addr) (1)
......
...@@ -553,11 +553,11 @@ extern void (*update_mmu_cache)(struct vm_area_struct *vma, ...@@ -553,11 +553,11 @@ extern void (*update_mmu_cache)(struct vm_area_struct *vma,
extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{ pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; } { pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; }
#define SWP_TYPE(x) (((x).val >> 32) & 0xff) #define __swp_type(x) (((x).val >> 32) & 0xff)
#define SWP_OFFSET(x) ((x).val >> 40) #define __swp_offset(x) ((x).val >> 40)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#ifndef CONFIG_DISCONTIGMEM #ifndef CONFIG_DISCONTIGMEM
#define kern_addr_valid(addr) (1) #define kern_addr_valid(addr) (1)
......
...@@ -312,14 +312,14 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, ...@@ -312,14 +312,14 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#define SWP_TYPE(x) ((x).val & 0x3f) #define __swp_type(x) ((x).val & 0x3f)
#define SWP_OFFSET(x) ( (((x).val >> 6) & 0x7) | \ #define __swp_offset(x) ( (((x).val >> 6) & 0x7) | \
(((x).val >> 7) & ~0x7) ) (((x).val >> 7) & ~0x7) )
#define SWP_ENTRY(type, offset) ((swp_entry_t) { (type) | \ #define __swp_entry(type, offset) ((swp_entry_t) { (type) | \
((offset & 0x7) << 6) | \ ((offset & 0x7) << 6) | \
((offset & ~0x7) << 7) }) ((offset & ~0x7) << 7) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define module_map vmalloc #define module_map vmalloc
#define module_unmap vfree #define module_unmap vfree
......
...@@ -482,11 +482,11 @@ extern void add_hash_page(unsigned context, unsigned long va, ...@@ -482,11 +482,11 @@ extern void add_hash_page(unsigned context, unsigned long va,
* must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit * must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit
* (if used). -- paulus * (if used). -- paulus
*/ */
#define SWP_TYPE(entry) ((entry).val & 0x3f) #define __swp_type(entry) ((entry).val & 0x3f)
#define SWP_OFFSET(entry) ((entry).val >> 6) #define __swp_offset(entry) ((entry).val >> 6)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { (type) | ((offset) << 6) }) #define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 6) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 })
#define swp_entry_to_pte(x) ((pte_t) { (x).val << 2 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 })
/* CONFIG_APUS */ /* CONFIG_APUS */
/* For virtual address to physical address conversion */ /* For virtual address to physical address conversion */
......
...@@ -359,11 +359,11 @@ extern void paging_init(void); ...@@ -359,11 +359,11 @@ extern void paging_init(void);
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f) #define __swp_type(entry) (((entry).val >> 1) & 0x3f)
#define SWP_OFFSET(entry) ((entry).val >> 8) #define __swp_offset(entry) ((entry).val >> 8)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT })
#define swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT })
/* /*
* kern_addr_valid is intended to indicate whether an address is a valid * kern_addr_valid is intended to indicate whether an address is a valid
......
...@@ -485,12 +485,12 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) ...@@ -485,12 +485,12 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
return pte; return pte;
} }
#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f) #define __swp_type(entry) (((entry).val >> 1) & 0x3f)
#define SWP_OFFSET(entry) (((entry).val >> 12) & 0x7FFFF ) #define __swp_offset(entry) (((entry).val >> 12) & 0x7FFFF )
#define SWP_ENTRY(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -505,12 +505,12 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) ...@@ -505,12 +505,12 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
return pte; return pte;
} }
#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f) #define __swp_type(entry) (((entry).val >> 1) & 0x3f)
#define SWP_OFFSET(entry) ((entry).val >> 12) #define __swp_offset(entry) ((entry).val >> 12)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -294,11 +294,11 @@ extern void update_mmu_cache(struct vm_area_struct * vma, ...@@ -294,11 +294,11 @@ extern void update_mmu_cache(struct vm_area_struct * vma,
* NOTE: We should set ZEROs at the position of _PAGE_PRESENT * NOTE: We should set ZEROs at the position of _PAGE_PRESENT
* and _PAGE_PROTONOE bits * and _PAGE_PROTONOE bits
*/ */
#define SWP_TYPE(x) ((x).val & 0xff) #define __swp_type(x) ((x).val & 0xff)
#define SWP_OFFSET(x) ((x).val >> 10) #define __swp_offset(x) ((x).val >> 10)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { (type) | ((offset) << 10) }) #define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 10) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
/* /*
* Routines for update of PTE * Routines for update of PTE
......
...@@ -376,11 +376,11 @@ BTFIXUPDEF_CALL(void, update_mmu_cache, struct vm_area_struct *, unsigned long, ...@@ -376,11 +376,11 @@ BTFIXUPDEF_CALL(void, update_mmu_cache, struct vm_area_struct *, unsigned long,
extern int invalid_segment; extern int invalid_segment;
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#define SWP_TYPE(x) (((x).val >> 2) & 0x7f) #define __swp_type(x) (((x).val >> 2) & 0x7f)
#define SWP_OFFSET(x) (((x).val >> 9) & 0x3ffff) #define __swp_offset(x) (((x).val >> 9) & 0x3ffff)
#define SWP_ENTRY(type,offset) ((swp_entry_t) { (((type) & 0x7f) << 2) | (((offset) & 0x3ffff) << 9) }) #define __swp_entry(type,offset) ((swp_entry_t) { (((type) & 0x7f) << 2) | (((offset) & 0x3ffff) << 9) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
struct ctx_list { struct ctx_list {
struct ctx_list *next; struct ctx_list *next;
......
...@@ -298,16 +298,16 @@ extern inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space) ...@@ -298,16 +298,16 @@ extern inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space)
} }
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#define SWP_TYPE(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL) #define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
#define SWP_OFFSET(entry) ((entry).val >> (PAGE_SHIFT + 8UL)) #define __swp_offset(entry) ((entry).val >> (PAGE_SHIFT + 8UL))
#define SWP_ENTRY(type, offset) \ #define __swp_entry(type, offset) \
( (swp_entry_t) \ ( (swp_entry_t) \
{ \ { \
(((long)(type) << PAGE_SHIFT) | \ (((long)(type) << PAGE_SHIFT) | \
((long)(offset) << (PAGE_SHIFT + 8UL))) \ ((long)(offset) << (PAGE_SHIFT + 8UL))) \
} ) } )
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
extern unsigned long prom_virt_to_phys(unsigned long, int *); extern unsigned long prom_virt_to_phys(unsigned long, int *);
......
...@@ -329,11 +329,11 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -329,11 +329,11 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define update_mmu_cache(vma,address,pte) do { } while (0) #define update_mmu_cache(vma,address,pte) do { } while (0)
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) #define __swp_type(x) (((x).val >> 1) & 0x3f)
#define SWP_OFFSET(x) ((x).val >> 8) #define __swp_offset(x) ((x).val >> 8)
#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define swp_entry_to_pte(x) ((pte_t) { (x).val }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -11,7 +11,16 @@ ...@@ -11,7 +11,16 @@
#define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_PRIO_SHIFT 0
#define MAX_SWAPFILES 32 /*
* MAX_SWAPFILES defines the maximum number of swaptypes: things which can
* be swapped to. The swap type and the offset into that swap type are
* encoded into pte's and into pgoff_t's in the swapcache. Using five bits
* for the type means that the maximum number of swapcache pages is 27 bits
* on 32-bit-pgoff_t architectures. And that assumes that the architecture packs
* the type/offset into the pte as 5/27 as well.
*/
#define MAX_SWAPFILES_SHIFT 5
#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT)
/* /*
* Magic header for a swap area. The first part of the union is * Magic header for a swap area. The first part of the union is
......
/*
* swapcache pages are stored in the swapper_space radix tree. We want to
* get good packing density in that tree, so the index should be dense in
* the low-order bits.
*
* We arrange the `type' and `offset' fields so that `type' is at the five
* high-order bits of the smp_entry_t and `offset' is right-aligned in the
* remaining bits.
*
* swp_entry_t's are *never* stored anywhere in their arch-dependent format.
*/
#define SWP_TYPE_SHIFT(e) (sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
#define SWP_OFFSET_MASK(e) ((1 << SWP_TYPE_SHIFT(e)) - 1)
/*
* Store a type+offset into a swp_entry_t in an arch-independent format
*/
static inline swp_entry_t swp_entry(unsigned type, pgoff_t offset)
{
swp_entry_t ret;
ret.val = (type << SWP_TYPE_SHIFT(ret)) |
(offset & SWP_OFFSET_MASK(ret));
return ret;
}
/*
* Extract the `type' field from a swp_entry_t. The swp_entry_t is in
* arch-independent format
*/
static inline unsigned swp_type(swp_entry_t entry)
{
return (entry.val >> SWP_TYPE_SHIFT(entry)) &
((1 << MAX_SWAPFILES_SHIFT) - 1);
}
/*
* Extract the `offset' field from a swp_entry_t. The swp_entry_t is in
* arch-independent format
*/
static inline pgoff_t swp_offset(swp_entry_t entry)
{
return entry.val & SWP_OFFSET_MASK(entry);
}
/*
* Convert the arch-dependent pte representation of a swp_entry_t into an
* arch-independent swp_entry_t.
*/
static inline swp_entry_t pte_to_swp_entry(pte_t pte)
{
swp_entry_t arch_entry;
arch_entry = __pte_to_swp_entry(pte);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
}
/*
* Convert the arch-independent representation of a swp_entry_t into the
* arch-dependent pte representation.
*/
static inline pte_t swp_entry_to_pte(swp_entry_t entry)
{
swp_entry_t arch_entry;
arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
return __swp_entry_to_pte(arch_entry);
}
...@@ -124,6 +124,14 @@ typedef u64 sector_t; ...@@ -124,6 +124,14 @@ typedef u64 sector_t;
typedef unsigned long sector_t; typedef unsigned long sector_t;
#endif #endif
/*
* The type of an index into the pagecache. Use a #define so asm/types.h
* can override it.
*/
#ifndef pgoff_t
#define pgoff_t unsigned long
#endif
#endif /* __KERNEL_STRICT_NAMES */ #endif /* __KERNEL_STRICT_NAMES */
/* /*
......
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/io.h> #include <asm/io.h>
#include <linux/swapops.h>
unsigned char software_suspend_enabled = 0; unsigned char software_suspend_enabled = 0;
...@@ -327,7 +328,7 @@ static void mark_swapfiles(swp_entry_t prev, int mode) ...@@ -327,7 +328,7 @@ static void mark_swapfiles(swp_entry_t prev, int mode)
if (!cur) if (!cur)
panic("Out of memory in mark_swapfiles"); panic("Out of memory in mark_swapfiles");
/* XXX: this is dirty hack to get first page of swap file */ /* XXX: this is dirty hack to get first page of swap file */
entry = SWP_ENTRY(root_swap, 0); entry = swp_entry(root_swap, 0);
lock_page(virt_to_page((unsigned long)cur)); lock_page(virt_to_page((unsigned long)cur));
rw_swap_page_nolock(READ, entry, (char *) cur); rw_swap_page_nolock(READ, entry, (char *) cur);
...@@ -420,7 +421,7 @@ static int write_suspend_image(void) ...@@ -420,7 +421,7 @@ static int write_suspend_image(void)
if (!(entry = get_swap_page()).val) if (!(entry = get_swap_page()).val)
panic("\nNot enough swapspace when writing data" ); panic("\nNot enough swapspace when writing data" );
if(swapfile_used[SWP_TYPE(entry)] != SWAPFILE_SUSPEND) if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nPage %d: not enough swapspace on suspend device", i ); panic("\nPage %d: not enough swapspace on suspend device", i );
address = (pagedir_nosave+i)->address; address = (pagedir_nosave+i)->address;
...@@ -446,7 +447,7 @@ static int write_suspend_image(void) ...@@ -446,7 +447,7 @@ static int write_suspend_image(void)
return -ENOSPC; return -ENOSPC;
} }
if(swapfile_used[SWP_TYPE(entry)] != SWAPFILE_SUSPEND) if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nNot enough swapspace for pagedir on suspend device" ); panic("\nNot enough swapspace for pagedir on suspend device" );
if (sizeof(swp_entry_t) != sizeof(long)) if (sizeof(swp_entry_t) != sizeof(long))
...@@ -466,7 +467,7 @@ static int write_suspend_image(void) ...@@ -466,7 +467,7 @@ static int write_suspend_image(void)
panic("union diskpage has bad size"); panic("union diskpage has bad size");
if (!(entry = get_swap_page()).val) if (!(entry = get_swap_page()).val)
panic( "\nNot enough swapspace when writing header" ); panic( "\nNot enough swapspace when writing header" );
if(swapfile_used[SWP_TYPE(entry)] != SWAPFILE_SUSPEND) if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
panic("\nNot enough swapspace for header on suspend device" ); panic("\nNot enough swapspace for header on suspend device" );
cur = (void *) buffer; cur = (void *) buffer;
...@@ -481,7 +482,7 @@ static int write_suspend_image(void) ...@@ -481,7 +482,7 @@ static int write_suspend_image(void)
PRINTK( ", signature" ); PRINTK( ", signature" );
#if 0 #if 0
if (SWP_TYPE(entry) != 0) if (swp_type(entry) != 0)
panic("Need just one swapfile"); panic("Need just one swapfile");
#endif #endif
mark_swapfiles(prev, MARK_SWAP_SUSPEND); mark_swapfiles(prev, MARK_SWAP_SUSPEND);
...@@ -1069,7 +1070,7 @@ static int resume_try_to_read(const char * specialfile, int noresume) ...@@ -1069,7 +1070,7 @@ static int resume_try_to_read(const char * specialfile, int noresume)
if (bdev_read_page(resume_device, pos, ptr)) { error = -EIO; goto resume_read_error; } if (bdev_read_page(resume_device, pos, ptr)) { error = -EIO; goto resume_read_error; }
#define PREPARENEXT \ #define PREPARENEXT \
{ next = cur->link.next; \ { next = cur->link.next; \
next.val = SWP_OFFSET(next) * PAGE_SIZE; \ next.val = swp_offset(next) * PAGE_SIZE; \
} }
error = -EIO; error = -EIO;
...@@ -1142,7 +1143,7 @@ static int resume_try_to_read(const char * specialfile, int noresume) ...@@ -1142,7 +1143,7 @@ static int resume_try_to_read(const char * specialfile, int noresume)
swp_entry_t swap_address = (pagedir_nosave+i)->swap_address; swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
if (!(i%100)) if (!(i%100))
PRINTK( "." ); PRINTK( "." );
next.val = SWP_OFFSET (swap_address) * PAGE_SIZE; next.val = swp_offset(swap_address) * PAGE_SIZE;
/* You do not need to check for overlaps... /* You do not need to check for overlaps...
... check_pagedir already did this work */ ... check_pagedir already did this work */
READTO(next.val, (char *)((pagedir_nosave+i)->address)); READTO(next.val, (char *)((pagedir_nosave+i)->address));
......
...@@ -50,6 +50,8 @@ ...@@ -50,6 +50,8 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <linux/swapops.h>
unsigned long max_mapnr; unsigned long max_mapnr;
unsigned long num_physpages; unsigned long num_physpages;
void * high_memory; void * high_memory;
...@@ -1128,7 +1130,7 @@ void swapin_readahead(swp_entry_t entry) ...@@ -1128,7 +1130,7 @@ void swapin_readahead(swp_entry_t entry)
num = valid_swaphandles(entry, &offset); num = valid_swaphandles(entry, &offset);
for (i = 0; i < num; offset++, i++) { for (i = 0; i < num; offset++, i++) {
/* Ok, do the async read-ahead now */ /* Ok, do the async read-ahead now */
new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); new_page = read_swap_cache_async(swp_entry(swp_type(entry), offset));
if (!new_page) if (!new_page)
break; break;
page_cache_release(new_page); page_cache_release(new_page);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/buffer_head.h> /* for block_flushpage() */ #include <linux/buffer_head.h> /* for block_flushpage() */
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <linux/swapops.h>
spinlock_t swaplock = SPIN_LOCK_UNLOCKED; spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
unsigned int nr_swapfiles; unsigned int nr_swapfiles;
...@@ -121,7 +122,7 @@ swp_entry_t get_swap_page(void) ...@@ -121,7 +122,7 @@ swp_entry_t get_swap_page(void)
offset = scan_swap_map(p); offset = scan_swap_map(p);
swap_device_unlock(p); swap_device_unlock(p);
if (offset) { if (offset) {
entry = SWP_ENTRY(type,offset); entry = swp_entry(type,offset);
type = swap_info[type].next; type = swap_info[type].next;
if (type < 0 || if (type < 0 ||
p->prio != swap_info[type].prio) { p->prio != swap_info[type].prio) {
...@@ -154,13 +155,13 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry) ...@@ -154,13 +155,13 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry)
if (!entry.val) if (!entry.val)
goto out; goto out;
type = SWP_TYPE(entry); type = swp_type(entry);
if (type >= nr_swapfiles) if (type >= nr_swapfiles)
goto bad_nofile; goto bad_nofile;
p = & swap_info[type]; p = & swap_info[type];
if (!(p->flags & SWP_USED)) if (!(p->flags & SWP_USED))
goto bad_device; goto bad_device;
offset = SWP_OFFSET(entry); offset = swp_offset(entry);
if (offset >= p->max) if (offset >= p->max)
goto bad_offset; goto bad_offset;
if (!p->swap_map[offset]) if (!p->swap_map[offset])
...@@ -220,7 +221,7 @@ void swap_free(swp_entry_t entry) ...@@ -220,7 +221,7 @@ void swap_free(swp_entry_t entry)
p = swap_info_get(entry); p = swap_info_get(entry);
if (p) { if (p) {
swap_entry_free(p, SWP_OFFSET(entry)); swap_entry_free(p, swp_offset(entry));
swap_info_put(p); swap_info_put(p);
} }
} }
...@@ -239,7 +240,7 @@ static int exclusive_swap_page(struct page *page) ...@@ -239,7 +240,7 @@ static int exclusive_swap_page(struct page *page)
p = swap_info_get(entry); p = swap_info_get(entry);
if (p) { if (p) {
/* Is the only swap cache user the cache itself? */ /* Is the only swap cache user the cache itself? */
if (p->swap_map[SWP_OFFSET(entry)] == 1) { if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the pagecache lock held.. */ /* Recheck the page count with the pagecache lock held.. */
read_lock(&swapper_space.page_lock); read_lock(&swapper_space.page_lock);
if (page_count(page) - !!PagePrivate(page) == 2) if (page_count(page) - !!PagePrivate(page) == 2)
...@@ -307,7 +308,7 @@ int remove_exclusive_swap_page(struct page *page) ...@@ -307,7 +308,7 @@ int remove_exclusive_swap_page(struct page *page)
/* Is the only swap cache user the cache itself? */ /* Is the only swap cache user the cache itself? */
retval = 0; retval = 0;
if (p->swap_map[SWP_OFFSET(entry)] == 1) { if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the pagecache lock held.. */ /* Recheck the page count with the pagecache lock held.. */
write_lock(&swapper_space.page_lock); write_lock(&swapper_space.page_lock);
if (page_count(page) - !!PagePrivate(page) == 2) { if (page_count(page) - !!PagePrivate(page) == 2) {
...@@ -344,7 +345,7 @@ void free_swap_and_cache(swp_entry_t entry) ...@@ -344,7 +345,7 @@ void free_swap_and_cache(swp_entry_t entry)
p = swap_info_get(entry); p = swap_info_get(entry);
if (p) { if (p) {
if (swap_entry_free(p, SWP_OFFSET(entry)) == 1) if (swap_entry_free(p, swp_offset(entry)) == 1)
page = find_trylock_page(&swapper_space, entry.val); page = find_trylock_page(&swapper_space, entry.val);
swap_info_put(p); swap_info_put(p);
} }
...@@ -568,7 +569,7 @@ static int try_to_unuse(unsigned int type) ...@@ -568,7 +569,7 @@ static int try_to_unuse(unsigned int type)
* page and read the swap into it. * page and read the swap into it.
*/ */
swap_map = &si->swap_map[i]; swap_map = &si->swap_map[i];
entry = SWP_ENTRY(type, i); entry = swp_entry(type, i);
page = read_swap_cache_async(entry); page = read_swap_cache_async(entry);
if (!page) { if (!page) {
/* /*
...@@ -954,7 +955,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -954,7 +955,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} }
lock_page(virt_to_page(swap_header)); lock_page(virt_to_page(swap_header));
rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header); rw_swap_page_nolock(READ, swp_entry(type,0), (char *) swap_header);
if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
swap_header_version = 1; swap_header_version = 1;
...@@ -1007,7 +1008,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) ...@@ -1007,7 +1008,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} }
p->lowest_bit = 1; p->lowest_bit = 1;
maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL)) - 1; maxpages = swp_offset(swp_entry(0,~0UL)) - 1;
if (maxpages > swap_header->info.last_page) if (maxpages > swap_header->info.last_page)
maxpages = swap_header->info.last_page; maxpages = swap_header->info.last_page;
p->highest_bit = maxpages - 1; p->highest_bit = maxpages - 1;
...@@ -1141,11 +1142,11 @@ int swap_duplicate(swp_entry_t entry) ...@@ -1141,11 +1142,11 @@ int swap_duplicate(swp_entry_t entry)
unsigned long offset, type; unsigned long offset, type;
int result = 0; int result = 0;
type = SWP_TYPE(entry); type = swp_type(entry);
if (type >= nr_swapfiles) if (type >= nr_swapfiles)
goto bad_file; goto bad_file;
p = type + swap_info; p = type + swap_info;
offset = SWP_OFFSET(entry); offset = swp_offset(entry);
swap_device_lock(p); swap_device_lock(p);
if (offset < p->max && p->swap_map[offset]) { if (offset < p->max && p->swap_map[offset]) {
...@@ -1182,11 +1183,11 @@ int swap_count(struct page *page) ...@@ -1182,11 +1183,11 @@ int swap_count(struct page *page)
entry.val = page->index; entry.val = page->index;
if (!entry.val) if (!entry.val)
goto bad_entry; goto bad_entry;
type = SWP_TYPE(entry); type = swp_type(entry);
if (type >= nr_swapfiles) if (type >= nr_swapfiles)
goto bad_file; goto bad_file;
p = type + swap_info; p = type + swap_info;
offset = SWP_OFFSET(entry); offset = swp_offset(entry);
if (offset >= p->max) if (offset >= p->max)
goto bad_offset; goto bad_offset;
if (!p->swap_map[offset]) if (!p->swap_map[offset])
...@@ -1218,14 +1219,14 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset, ...@@ -1218,14 +1219,14 @@ void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
unsigned long type; unsigned long type;
struct swap_info_struct *p; struct swap_info_struct *p;
type = SWP_TYPE(entry); type = swp_type(entry);
if (type >= nr_swapfiles) { if (type >= nr_swapfiles) {
printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val); printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
return; return;
} }
p = &swap_info[type]; p = &swap_info[type];
*offset = SWP_OFFSET(entry); *offset = swp_offset(entry);
if (*offset >= p->max && *offset != 0) { if (*offset >= p->max && *offset != 0) {
printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val); printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
return; return;
...@@ -1250,11 +1251,11 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) ...@@ -1250,11 +1251,11 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
{ {
int ret = 0, i = 1 << page_cluster; int ret = 0, i = 1 << page_cluster;
unsigned long toff; unsigned long toff;
struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info; struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
if (!page_cluster) /* no readahead */ if (!page_cluster) /* no readahead */
return 0; return 0;
toff = (SWP_OFFSET(entry) >> page_cluster) << page_cluster; toff = (swp_offset(entry) >> page_cluster) << page_cluster;
if (!toff) /* first page is swap header */ if (!toff) /* first page is swap header */
toff++, i--; toff++, i--;
*offset = toff; *offset = toff;
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <linux/swapops.h>
/* /*
* The "priority" of VM scanning is how much of the queues we * The "priority" of VM scanning is how much of the queues we
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment