Commit 2fe9c14c authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 17: real prio_tree

From: Hugh Dickins <hugh@veritas.com>

Rajesh Venkatasubramanian's implementation of a radix priority search tree of
vmas, to handle object-based reverse mapping corner cases well.

Amongst the objections to object-based rmap were test cases by akpm and by
mingo, in which large numbers of vmas mapping disjoint or overlapping parts of
a file showed strikingly poor performance of the i_mmap lists.  Perhaps those
tests are irrelevant in the real world?  We cannot be too sure: the prio_tree
is well-suited to solving precisely that problem, so unless it turns out to
bring too much overhead, let's include it.

Why is this prio_tree.c placed in mm rather than lib?  See GET_INDEX: this
implementation is geared throughout to use with vmas, though the first half of
the file appears more general than the second half.

Each node of the prio_tree is itself (contained within) a vma: might save
memory by allocating distinct nodes from which to hang vmas, but wouldn't save
much, and would complicate the usage with preallocations.  Off each node of
the prio_tree itself hangs a list of like vmas, if any.

The connection from node to list is a little awkward, but probably the best
compromise: it would be more straightforward to list likes directly from the
tree node, but that would use more memory per vma, for the list_head and to
identify that head.  Instead, node's shared.vm_set.head points to next vma
(whose shared.vm_set.head points back to node vma), and that next contains the
list_head from which the rest hang - reusing fields already used in the
prio_tree node itself.

Currently lacks prefetch: Rajesh hopes to add some soon.
parent fc96c90f
...@@ -73,7 +73,15 @@ struct vm_area_struct { ...@@ -73,7 +73,15 @@ struct vm_area_struct {
* For areas with an address space and backing store, * For areas with an address space and backing store,
* one of the address_space->i_mmap{,shared} trees. * one of the address_space->i_mmap{,shared} trees.
*/ */
struct list_head shared; union {
struct {
struct list_head list;
void *parent; /* aligns with prio_tree_node parent */
struct vm_area_struct *head;
} vm_set;
struct prio_tree_node prio_tree_node;
} shared;
/* Function pointers to deal with this struct. */ /* Function pointers to deal with this struct. */
struct vm_operations_struct * vm_ops; struct vm_operations_struct * vm_ops;
...@@ -589,27 +597,16 @@ extern void si_meminfo_node(struct sysinfo *val, int nid); ...@@ -589,27 +597,16 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
static inline void vma_prio_tree_init(struct vm_area_struct *vma) static inline void vma_prio_tree_init(struct vm_area_struct *vma)
{ {
INIT_LIST_HEAD(&vma->shared); vma->shared.vm_set.list.next = NULL;
} vma->shared.vm_set.list.prev = NULL;
vma->shared.vm_set.parent = NULL;
static inline void vma_prio_tree_add(struct vm_area_struct *vma, vma->shared.vm_set.head = NULL;
struct vm_area_struct *old)
{
list_add(&vma->shared, &old->shared);
}
static inline void vma_prio_tree_insert(struct vm_area_struct *vma,
struct prio_tree_root *root)
{
list_add_tail(&vma->shared, &root->list);
}
static inline void vma_prio_tree_remove(struct vm_area_struct *vma,
struct prio_tree_root *root)
{
list_del_init(&vma->shared);
} }
/* prio_tree.c */
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
struct vm_area_struct *vma_prio_tree_next( struct vm_area_struct *vma_prio_tree_next(
struct vm_area_struct *, struct prio_tree_root *, struct vm_area_struct *, struct prio_tree_root *,
struct prio_tree_iter *, pgoff_t begin, pgoff_t end); struct prio_tree_iter *, pgoff_t begin, pgoff_t end);
......
#ifndef _LINUX_PRIO_TREE_H #ifndef _LINUX_PRIO_TREE_H
#define _LINUX_PRIO_TREE_H #define _LINUX_PRIO_TREE_H
/*
* Dummy version of include/linux/prio_tree.h, just for this patch: struct prio_tree_node {
* no radix priority search tree whatsoever, just implement interfaces struct prio_tree_node *left;
* using the old lists. struct prio_tree_node *right;
*/ struct prio_tree_node *parent;
};
struct prio_tree_root { struct prio_tree_root {
struct list_head list; struct prio_tree_node *prio_tree_node;
unsigned int index_bits;
}; };
struct prio_tree_iter { struct prio_tree_iter {
int not_used_yet; struct prio_tree_node *cur;
unsigned long mask;
unsigned long value;
int size_level;
}; };
#define INIT_PRIO_TREE_ROOT(ptr) \ #define INIT_PRIO_TREE_ROOT(ptr) \
do { \ do { \
INIT_LIST_HEAD(&(ptr)->list); \ (ptr)->prio_tree_node = NULL; \
} while (0) \ (ptr)->index_bits = 1; \
} while (0)
#define INIT_PRIO_TREE_NODE(ptr) \
do { \
(ptr)->left = (ptr)->right = (ptr)->parent = (ptr); \
} while (0)
#define INIT_PRIO_TREE_ITER(ptr) \
do { \
(ptr)->cur = NULL; \
(ptr)->mask = 0UL; \
(ptr)->value = 0UL; \
(ptr)->size_level = 0; \
} while (0)
#define prio_tree_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
static inline int prio_tree_empty(const struct prio_tree_root *root) static inline int prio_tree_empty(const struct prio_tree_root *root)
{ {
return list_empty(&root->list); return root->prio_tree_node == NULL;
}
static inline int prio_tree_root(const struct prio_tree_node *node)
{
return node->parent == node;
}
static inline int prio_tree_left_empty(const struct prio_tree_node *node)
{
return node->left == node;
}
static inline int prio_tree_right_empty(const struct prio_tree_node *node)
{
return node->right == node;
} }
#endif /* _LINUX_PRIO_TREE_H */ #endif /* _LINUX_PRIO_TREE_H */
...@@ -84,6 +84,7 @@ extern void signals_init(void); ...@@ -84,6 +84,7 @@ extern void signals_init(void);
extern void buffer_init(void); extern void buffer_init(void);
extern void pidhash_init(void); extern void pidhash_init(void);
extern void pidmap_init(void); extern void pidmap_init(void);
extern void prio_tree_init(void);
extern void radix_tree_init(void); extern void radix_tree_init(void);
extern void free_initmem(void); extern void free_initmem(void);
extern void populate_rootfs(void); extern void populate_rootfs(void);
...@@ -459,6 +460,7 @@ asmlinkage void __init start_kernel(void) ...@@ -459,6 +460,7 @@ asmlinkage void __init start_kernel(void)
calibrate_delay(); calibrate_delay();
pidmap_init(); pidmap_init();
pgtable_cache_init(); pgtable_cache_init();
prio_tree_init();
#ifdef CONFIG_X86 #ifdef CONFIG_X86
if (efi_enabled) if (efi_enabled)
efi_enter_virtual_mode(); efi_enter_virtual_mode();
......
...@@ -8,8 +8,9 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ ...@@ -8,8 +8,9 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
shmem.o vmalloc.o shmem.o vmalloc.o
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
page_alloc.o page-writeback.o pdflush.o readahead.o \ page_alloc.o page-writeback.o pdflush.o prio_tree.o \
slab.o swap.o truncate.o vmscan.o $(mmu-y) readahead.o slab.o swap.o truncate.o vmscan.o \
$(mmu-y)
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o
......
...@@ -322,31 +322,6 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) ...@@ -322,31 +322,6 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
validate_mm(mm); validate_mm(mm);
} }
/*
* Dummy version of vma_prio_tree_next, just for this patch:
* no radix priority search tree whatsoever, just implement interface
* using the old lists: return the next vma overlapping [begin,end].
*/
struct vm_area_struct *vma_prio_tree_next(
struct vm_area_struct *vma, struct prio_tree_root *root,
struct prio_tree_iter *iter, pgoff_t begin, pgoff_t end)
{
struct list_head *next;
pgoff_t vba, vea;
next = vma? vma->shared.next: root->list.next;
while (next != &root->list) {
vma = list_entry(next, struct vm_area_struct, shared);
vba = vma->vm_pgoff;
vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
/* Return vma if it overlaps [begin,end] */
if (vba <= end && vea >= begin)
return vma;
next = next->next;
}
return NULL;
}
/* /*
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that is * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that is
* already present in an i_mmap{_shared} tree without adjusting the tree. * already present in an i_mmap{_shared} tree without adjusting the tree.
......
/*
* mm/prio_tree.c - priority search tree for mapping->i_mmap{,_shared}
*
* Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
*
* This file is released under the GPL v2.
*
* Based on the radix priority search tree proposed by Edward M. McCreight
* SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
*
* 02Feb2004 Initial version
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/prio_tree.h>
/*
* A clever mix of heap and radix trees forms a radix priority search tree (PST)
* which is useful for storing intervals, e.g, we can consider a vma as a closed
* interval of file pages [offset_begin, offset_end], and store all vmas that
* map a file in a PST. Then, using the PST, we can answer a stabbing query,
* i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
* given input interval X (a set of consecutive file pages), in "O(log n + m)"
* time where 'log n' is the height of the PST, and 'm' is the number of stored
* intervals (vmas) that overlap (map) with the input interval X (the set of
* consecutive file pages).
*
* In our implementation, we store closed intervals of the form [radix_index,
* heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
* is designed for storing intervals with unique radix indices, i.e., each
* interval have different radix_index. However, this limitation can be easily
* overcome by using the size, i.e., heap_index - radix_index, as part of the
* index, so we index the tree using [(radix_index,size), heap_index].
*
* When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
* machine, the maximum height of a PST can be 64. We can use a balanced version
* of the priority search tree to optimize the tree height, but the balanced
* tree proposed by McCreight is too complex and memory-hungry for our purpose.
*/
/*
* The following macros are used for implementing prio_tree for i_mmap{_shared}
*/
#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
/* avoid overflow */
#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
#define GET_INDEX_VMA(vma, radix, heap) \
do { \
radix = RADIX_INDEX(vma); \
heap = HEAP_INDEX(vma); \
} while (0)
#define GET_INDEX(node, radix, heap) \
do { \
struct vm_area_struct *__tmp = \
prio_tree_entry(node, struct vm_area_struct, shared.prio_tree_node);\
GET_INDEX_VMA(__tmp, radix, heap); \
} while (0)
static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
void __init prio_tree_init(void)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
}
/*
* Maximum heap_index that can be stored in a PST with index_bits bits
*/
static inline unsigned long prio_tree_maxindex(unsigned int bits)
{
return index_bits_to_maxindex[bits - 1];
}
/*
* Extend a priority search tree so that it can store a node with heap_index
* max_heap_index. In the worst case, this algorithm takes O((log n)^2).
* However, this function is used rarely and the common case performance is
* not bad.
*/
static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
struct prio_tree_node *node, unsigned long max_heap_index)
{
static void prio_tree_remove(struct prio_tree_root *,
struct prio_tree_node *);
struct prio_tree_node *first = NULL, *prev, *last = NULL;
if (max_heap_index > prio_tree_maxindex(root->index_bits))
root->index_bits++;
while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
root->index_bits++;
if (prio_tree_empty(root))
continue;
if (first == NULL) {
first = root->prio_tree_node;
prio_tree_remove(root, root->prio_tree_node);
INIT_PRIO_TREE_NODE(first);
last = first;
} else {
prev = last;
last = root->prio_tree_node;
prio_tree_remove(root, root->prio_tree_node);
INIT_PRIO_TREE_NODE(last);
prev->left = last;
last->parent = prev;
}
}
INIT_PRIO_TREE_NODE(node);
if (first) {
node->left = first;
first->parent = node;
} else
last = node;
if (!prio_tree_empty(root)) {
last->left = root->prio_tree_node;
last->left->parent = last;
}
root->prio_tree_node = node;
return node;
}
/*
* Replace a prio_tree_node with a new node and return the old node
*/
static struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
struct prio_tree_node *old, struct prio_tree_node *node)
{
INIT_PRIO_TREE_NODE(node);
if (prio_tree_root(old)) {
BUG_ON(root->prio_tree_node != old);
/*
* We can reduce root->index_bits here. However, it is complex
* and does not help much to improve performance (IMO).
*/
node->parent = node;
root->prio_tree_node = node;
} else {
node->parent = old->parent;
if (old->parent->left == old)
old->parent->left = node;
else
old->parent->right = node;
}
if (!prio_tree_left_empty(old)) {
node->left = old->left;
old->left->parent = node;
}
if (!prio_tree_right_empty(old)) {
node->right = old->right;
old->right->parent = node;
}
return old;
}
/*
* Insert a prio_tree_node @node into a radix priority search tree @root. The
* algorithm typically takes O(log n) time where 'log n' is the number of bits
* required to represent the maximum heap_index. In the worst case, the algo
* can take O((log n)^2) - check prio_tree_expand.
*
* If a prior node with same radix_index and heap_index is already found in
* the tree, then returns the address of the prior node. Otherwise, inserts
* @node into the tree and returns @node.
*/
static struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
struct prio_tree_node *node)
{
struct prio_tree_node *cur, *res = node;
unsigned long radix_index, heap_index;
unsigned long r_index, h_index, index, mask;
int size_flag = 0;
GET_INDEX(node, radix_index, heap_index);
if (prio_tree_empty(root) ||
heap_index > prio_tree_maxindex(root->index_bits))
return prio_tree_expand(root, node, heap_index);
cur = root->prio_tree_node;
mask = 1UL << (root->index_bits - 1);
while (mask) {
GET_INDEX(cur, r_index, h_index);
if (r_index == radix_index && h_index == heap_index)
return cur;
if (h_index < heap_index ||
(h_index == heap_index && r_index > radix_index)) {
struct prio_tree_node *tmp = node;
node = prio_tree_replace(root, cur, node);
cur = tmp;
/* swap indices */
index = r_index;
r_index = radix_index;
radix_index = index;
index = h_index;
h_index = heap_index;
heap_index = index;
}
if (size_flag)
index = heap_index - radix_index;
else
index = radix_index;
if (index & mask) {
if (prio_tree_right_empty(cur)) {
INIT_PRIO_TREE_NODE(node);
cur->right = node;
node->parent = cur;
return res;
} else
cur = cur->right;
} else {
if (prio_tree_left_empty(cur)) {
INIT_PRIO_TREE_NODE(node);
cur->left = node;
node->parent = cur;
return res;
} else
cur = cur->left;
}
mask >>= 1;
if (!mask) {
mask = 1UL << (root->index_bits - 1);
size_flag = 1;
}
}
/* Should not reach here */
BUG();
return NULL;
}
/*
* Remove a prio_tree_node @node from a radix priority search tree @root. The
* algorithm takes O(log n) time where 'log n' is the number of bits required
* to represent the maximum heap_index.
*/
static void prio_tree_remove(struct prio_tree_root *root,
struct prio_tree_node *node)
{
struct prio_tree_node *cur;
unsigned long r_index, h_index_right, h_index_left;
cur = node;
while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
if (!prio_tree_left_empty(cur))
GET_INDEX(cur->left, r_index, h_index_left);
else {
cur = cur->right;
continue;
}
if (!prio_tree_right_empty(cur))
GET_INDEX(cur->right, r_index, h_index_right);
else {
cur = cur->left;
continue;
}
/* both h_index_left and h_index_right cannot be 0 */
if (h_index_left >= h_index_right)
cur = cur->left;
else
cur = cur->right;
}
if (prio_tree_root(cur)) {
BUG_ON(root->prio_tree_node != cur);
INIT_PRIO_TREE_ROOT(root);
return;
}
if (cur->parent->right == cur)
cur->parent->right = cur->parent;
else
cur->parent->left = cur->parent;
while (cur != node)
cur = prio_tree_replace(root, cur->parent, cur);
}
/*
* Following functions help to enumerate all prio_tree_nodes in the tree that
* overlap with the input interval X [radix_index, heap_index]. The enumeration
* takes O(log n + m) time where 'log n' is the height of the tree (which is
* proportional to # of bits required to represent the maximum heap_index) and
* 'm' is the number of prio_tree_nodes that overlap the interval X.
*/
static struct prio_tree_node *prio_tree_left(
struct prio_tree_root *root, struct prio_tree_iter *iter,
unsigned long radix_index, unsigned long heap_index,
unsigned long *r_index, unsigned long *h_index)
{
if (prio_tree_left_empty(iter->cur))
return NULL;
GET_INDEX(iter->cur->left, *r_index, *h_index);
if (radix_index <= *h_index) {
iter->cur = iter->cur->left;
iter->mask >>= 1;
if (iter->mask) {
if (iter->size_level)
iter->size_level++;
} else {
if (iter->size_level) {
BUG_ON(!prio_tree_left_empty(iter->cur));
BUG_ON(!prio_tree_right_empty(iter->cur));
iter->size_level++;
iter->mask = ULONG_MAX;
} else {
iter->size_level = 1;
iter->mask = 1UL << (root->index_bits - 1);
}
}
return iter->cur;
}
return NULL;
}
static struct prio_tree_node *prio_tree_right(
struct prio_tree_root *root, struct prio_tree_iter *iter,
unsigned long radix_index, unsigned long heap_index,
unsigned long *r_index, unsigned long *h_index)
{
unsigned long value;
if (prio_tree_right_empty(iter->cur))
return NULL;
if (iter->size_level)
value = iter->value;
else
value = iter->value | iter->mask;
if (heap_index < value)
return NULL;
GET_INDEX(iter->cur->right, *r_index, *h_index);
if (radix_index <= *h_index) {
iter->cur = iter->cur->right;
iter->mask >>= 1;
iter->value = value;
if (iter->mask) {
if (iter->size_level)
iter->size_level++;
} else {
if (iter->size_level) {
BUG_ON(!prio_tree_left_empty(iter->cur));
BUG_ON(!prio_tree_right_empty(iter->cur));
iter->size_level++;
iter->mask = ULONG_MAX;
} else {
iter->size_level = 1;
iter->mask = 1UL << (root->index_bits - 1);
}
}
return iter->cur;
}
return NULL;
}
static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
{
iter->cur = iter->cur->parent;
if (iter->mask == ULONG_MAX)
iter->mask = 1UL;
else if (iter->size_level == 1)
iter->mask = 1UL;
else
iter->mask <<= 1;
if (iter->size_level)
iter->size_level--;
if (!iter->size_level && (iter->value & iter->mask))
iter->value ^= iter->mask;
return iter->cur;
}
static inline int overlap(unsigned long radix_index, unsigned long heap_index,
unsigned long r_index, unsigned long h_index)
{
return heap_index >= r_index && radix_index <= h_index;
}
/*
* prio_tree_first:
*
* Get the first prio_tree_node that overlaps with the interval [radix_index,
* heap_index]. Note that always radix_index <= heap_index. We do a pre-order
* traversal of the tree.
*/
static struct prio_tree_node *prio_tree_first(struct prio_tree_root *root,
struct prio_tree_iter *iter, unsigned long radix_index,
unsigned long heap_index)
{
unsigned long r_index, h_index;
INIT_PRIO_TREE_ITER(iter);
if (prio_tree_empty(root))
return NULL;
GET_INDEX(root->prio_tree_node, r_index, h_index);
if (radix_index > h_index)
return NULL;
iter->mask = 1UL << (root->index_bits - 1);
iter->cur = root->prio_tree_node;
while (1) {
if (overlap(radix_index, heap_index, r_index, h_index))
return iter->cur;
if (prio_tree_left(root, iter, radix_index, heap_index,
&r_index, &h_index))
continue;
if (prio_tree_right(root, iter, radix_index, heap_index,
&r_index, &h_index))
continue;
break;
}
return NULL;
}
/*
* prio_tree_next:
*
* Get the next prio_tree_node that overlaps with the input interval in iter
*/
static struct prio_tree_node *prio_tree_next(struct prio_tree_root *root,
struct prio_tree_iter *iter, unsigned long radix_index,
unsigned long heap_index)
{
unsigned long r_index, h_index;
repeat:
while (prio_tree_left(root, iter, radix_index,
heap_index, &r_index, &h_index)) {
if (overlap(radix_index, heap_index, r_index, h_index))
return iter->cur;
}
while (!prio_tree_right(root, iter, radix_index,
heap_index, &r_index, &h_index)) {
while (!prio_tree_root(iter->cur) &&
iter->cur->parent->right == iter->cur)
prio_tree_parent(iter);
if (prio_tree_root(iter->cur))
return NULL;
prio_tree_parent(iter);
}
if (overlap(radix_index, heap_index, r_index, h_index))
return iter->cur;
goto repeat;
}
/*
* Radix priority search tree for address_space->i_mmap_{_shared}
*
* For each vma that map a unique set of file pages i.e., unique [radix_index,
* heap_index] value, we have a corresponing priority search tree node. If
* multiple vmas have identical [radix_index, heap_index] value, then one of
* them is used as a tree node and others are stored in a vm_set list. The tree
* node points to the first vma (head) of the list using vm_set.head.
*
* prio_tree_root
* |
* A vm_set.head
* / \ /
* L R -> H-I-J-K-M-N-O-P-Q-S
* ^ ^ <-- vm_set.list -->
* tree nodes
*
* We need some way to identify whether a vma is a tree node, head of a vm_set
* list, or just a member of a vm_set list. We cannot use vm_flags to store
* such information. The reason is, in the above figure, it is possible that
* vm_flags' of R and H are covered by the different mmap_sems. When R is
* removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
* H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
* That's why some trick involving shared.vm_set.parent is used for identifying
* tree nodes and list head nodes.
*
* vma radix priority search tree node rules:
*
* vma->shared.vm_set.parent != NULL ==> a tree node
* vma->shared.vm_set.head != NULL ==> list of others mapping same range
* vma->shared.vm_set.head == NULL ==> no others map the same range
*
* vma->shared.vm_set.parent == NULL
* vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
* vma->shared.vm_set.head == NULL ==> a list node
*/
/*
* Add a new vma known to map the same set of pages as the old vma:
* useful for fork's dup_mmap as well as vma_prio_tree_insert below.
*/
void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
{
/* Leave these BUG_ONs till prio_tree patch stabilizes */
BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
if (!old->shared.vm_set.parent)
list_add(&vma->shared.vm_set.list,
&old->shared.vm_set.list);
else if (old->shared.vm_set.head)
list_add_tail(&vma->shared.vm_set.list,
&old->shared.vm_set.head->shared.vm_set.list);
else {
INIT_LIST_HEAD(&vma->shared.vm_set.list);
vma->shared.vm_set.head = old;
old->shared.vm_set.head = vma;
}
}
void vma_prio_tree_insert(struct vm_area_struct *vma,
struct prio_tree_root *root)
{
struct prio_tree_node *ptr;
struct vm_area_struct *old;
ptr = prio_tree_insert(root, &vma->shared.prio_tree_node);
if (ptr != &vma->shared.prio_tree_node) {
old = prio_tree_entry(ptr, struct vm_area_struct,
shared.prio_tree_node);
vma_prio_tree_add(vma, old);
}
}
void vma_prio_tree_remove(struct vm_area_struct *vma,
struct prio_tree_root *root)
{
struct vm_area_struct *node, *head, *new_head;
if (!vma->shared.vm_set.head) {
if (!vma->shared.vm_set.parent)
list_del_init(&vma->shared.vm_set.list);
else
prio_tree_remove(root, &vma->shared.prio_tree_node);
} else {
/* Leave this BUG_ON till prio_tree patch stabilizes */
BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
if (vma->shared.vm_set.parent) {
head = vma->shared.vm_set.head;
if (!list_empty(&head->shared.vm_set.list)) {
new_head = list_entry(
head->shared.vm_set.list.next,
struct vm_area_struct,
shared.vm_set.list);
list_del_init(&head->shared.vm_set.list);
} else
new_head = NULL;
prio_tree_replace(root, &vma->shared.prio_tree_node,
&head->shared.prio_tree_node);
head->shared.vm_set.head = new_head;
if (new_head)
new_head->shared.vm_set.head = head;
} else {
node = vma->shared.vm_set.head;
if (!list_empty(&vma->shared.vm_set.list)) {
new_head = list_entry(
vma->shared.vm_set.list.next,
struct vm_area_struct,
shared.vm_set.list);
list_del_init(&vma->shared.vm_set.list);
node->shared.vm_set.head = new_head;
new_head->shared.vm_set.head = node;
} else
node->shared.vm_set.head = NULL;
}
}
}
/*
* Helper function to enumerate vmas that map a given file page or a set of
* contiguous file pages. The function returns vmas that at least map a single
* page in the given range of contiguous file pages.
*/
struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
struct prio_tree_root *root, struct prio_tree_iter *iter,
pgoff_t begin, pgoff_t end)
{
struct prio_tree_node *ptr;
struct vm_area_struct *next;
if (!vma) {
/*
* First call is with NULL vma
*/
ptr = prio_tree_first(root, iter, begin, end);
if (ptr)
return prio_tree_entry(ptr, struct vm_area_struct,
shared.prio_tree_node);
else
return NULL;
}
if (vma->shared.vm_set.parent) {
if (vma->shared.vm_set.head)
return vma->shared.vm_set.head;
} else {
next = list_entry(vma->shared.vm_set.list.next,
struct vm_area_struct, shared.vm_set.list);
if (!next->shared.vm_set.head)
return next;
}
ptr = prio_tree_next(root, iter, begin, end);
if (ptr)
return prio_tree_entry(ptr, struct vm_area_struct,
shared.prio_tree_node);
else
return NULL;
}
EXPORT_SYMBOL(vma_prio_tree_next);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment