Commit 12cfbad9 authored by Filipe David Borba Manana's avatar Filipe David Borba Manana Committed by Chris Mason

Btrfs: more efficient extent state insertions

Currently we do 2 traversals of an inode's extent_io_tree
before inserting an extent state structure: 1 to see if a
matching extent state already exists and 1 to do the insertion
if the fist traversal didn't found such extent state.

This change just combines those tree traversals into a single one.
While running sysbench tests (random writes) I captured the number
of elements in extent_io_tree trees for a while (into a procfs file
backed by a seq_list from seq_file module) and got this histogram:

Count: 9310
Range: 51.000 - 21386.000; Mean: 11785.243; Median: 18743.500; Stddev: 8923.688
Percentiles:  90th: 20985.000; 95th: 21155.000; 99th: 21369.000
  51.000 -   93.933:   693 ########
  93.933 -  172.314:   938 ##########
 172.314 -  315.408:   856 #########
 315.408 -  576.646:    95 #
 576.646 - 6415.830:   888 ##########
6415.830 - 11713.809:  1024 ###########
11713.809 - 21386.000:  4816 #####################################################

So traversing such trees can take some significant time that can
easily be avoided.

Ran the following sysbench tests, 5 times each, for sequential and
random writes, and got the following results:

  sysbench --test=fileio --file-num=1 --file-total-size=2G \
    --file-test-mode=seqwr --num-threads=16 --file-block-size=65536 \
    --max-requests=0 --max-time=60 --file-io-mode=sync

  sysbench --test=fileio --file-num=1 --file-total-size=2G \
    --file-test-mode=rndwr --num-threads=16 --file-block-size=65536 \
    --max-requests=0 --max-time=60 --file-io-mode=sync

Before this change:

sequential writes: 69.28Mb/sec (average of 5 runs)
random writes:     4.14Mb/sec  (average of 5 runs)

After this change:

sequential writes: 69.91Mb/sec (average of 5 runs)
random writes:     5.69Mb/sec  (average of 5 runs)
Signed-off-by: default avatarFilipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent c42ac0bc
...@@ -224,12 +224,20 @@ void free_extent_state(struct extent_state *state) ...@@ -224,12 +224,20 @@ void free_extent_state(struct extent_state *state)
} }
static struct rb_node *tree_insert(struct rb_root *root, u64 offset, static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
struct rb_node *node) struct rb_node *node,
struct rb_node ***p_in,
struct rb_node **parent_in)
{ {
struct rb_node **p = &root->rb_node; struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct tree_entry *entry; struct tree_entry *entry;
if (p_in && parent_in) {
p = *p_in;
parent = *parent_in;
goto do_insert;
}
while (*p) { while (*p) {
parent = *p; parent = *p;
entry = rb_entry(parent, struct tree_entry, rb_node); entry = rb_entry(parent, struct tree_entry, rb_node);
...@@ -242,6 +250,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, ...@@ -242,6 +250,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
return parent; return parent;
} }
do_insert:
rb_link_node(node, parent, p); rb_link_node(node, parent, p);
rb_insert_color(node, root); rb_insert_color(node, root);
return NULL; return NULL;
...@@ -249,28 +258,35 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, ...@@ -249,28 +258,35 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
struct rb_node **prev_ret, struct rb_node **prev_ret,
struct rb_node **next_ret) struct rb_node **next_ret,
struct rb_node ***p_ret,
struct rb_node **parent_ret)
{ {
struct rb_root *root = &tree->state; struct rb_root *root = &tree->state;
struct rb_node *n = root->rb_node; struct rb_node **n = &root->rb_node;
struct rb_node *prev = NULL; struct rb_node *prev = NULL;
struct rb_node *orig_prev = NULL; struct rb_node *orig_prev = NULL;
struct tree_entry *entry; struct tree_entry *entry;
struct tree_entry *prev_entry = NULL; struct tree_entry *prev_entry = NULL;
while (n) { while (*n) {
entry = rb_entry(n, struct tree_entry, rb_node); prev = *n;
prev = n; entry = rb_entry(prev, struct tree_entry, rb_node);
prev_entry = entry; prev_entry = entry;
if (offset < entry->start) if (offset < entry->start)
n = n->rb_left; n = &(*n)->rb_left;
else if (offset > entry->end) else if (offset > entry->end)
n = n->rb_right; n = &(*n)->rb_right;
else else
return n; return *n;
} }
if (p_ret)
*p_ret = n;
if (parent_ret)
*parent_ret = prev;
if (prev_ret) { if (prev_ret) {
orig_prev = prev; orig_prev = prev;
while (prev && offset > prev_entry->end) { while (prev && offset > prev_entry->end) {
...@@ -292,18 +308,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, ...@@ -292,18 +308,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
return NULL; return NULL;
} }
static inline struct rb_node *tree_search(struct extent_io_tree *tree, static inline struct rb_node *
u64 offset) tree_search_for_insert(struct extent_io_tree *tree,
u64 offset,
struct rb_node ***p_ret,
struct rb_node **parent_ret)
{ {
struct rb_node *prev = NULL; struct rb_node *prev = NULL;
struct rb_node *ret; struct rb_node *ret;
ret = __etree_search(tree, offset, &prev, NULL); ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
if (!ret) if (!ret)
return prev; return prev;
return ret; return ret;
} }
static inline struct rb_node *tree_search(struct extent_io_tree *tree,
u64 offset)
{
return tree_search_for_insert(tree, offset, NULL, NULL);
}
static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other) struct extent_state *other)
{ {
...@@ -385,6 +410,8 @@ static void set_state_bits(struct extent_io_tree *tree, ...@@ -385,6 +410,8 @@ static void set_state_bits(struct extent_io_tree *tree,
*/ */
static int insert_state(struct extent_io_tree *tree, static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end, struct extent_state *state, u64 start, u64 end,
struct rb_node ***p,
struct rb_node **parent,
unsigned long *bits) unsigned long *bits)
{ {
struct rb_node *node; struct rb_node *node;
...@@ -397,7 +424,7 @@ static int insert_state(struct extent_io_tree *tree, ...@@ -397,7 +424,7 @@ static int insert_state(struct extent_io_tree *tree,
set_state_bits(tree, state, bits); set_state_bits(tree, state, bits);
node = tree_insert(&tree->state, end, &state->rb_node); node = tree_insert(&tree->state, end, &state->rb_node, p, parent);
if (node) { if (node) {
struct extent_state *found; struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node); found = rb_entry(node, struct extent_state, rb_node);
...@@ -444,7 +471,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, ...@@ -444,7 +471,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
prealloc->state = orig->state; prealloc->state = orig->state;
orig->start = split; orig->start = split;
node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node,
NULL, NULL);
if (node) { if (node) {
free_extent_state(prealloc); free_extent_state(prealloc);
return -EEXIST; return -EEXIST;
...@@ -783,6 +811,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -783,6 +811,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state; struct extent_state *state;
struct extent_state *prealloc = NULL; struct extent_state *prealloc = NULL;
struct rb_node *node; struct rb_node *node;
struct rb_node **p;
struct rb_node *parent;
int err = 0; int err = 0;
u64 last_start; u64 last_start;
u64 last_end; u64 last_end;
...@@ -809,11 +839,12 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -809,11 +839,12 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
*/ */
node = tree_search(tree, start); node = tree_search_for_insert(tree, start, &p, &parent);
if (!node) { if (!node) {
prealloc = alloc_extent_state_atomic(prealloc); prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc); BUG_ON(!prealloc);
err = insert_state(tree, prealloc, start, end, &bits); err = insert_state(tree, prealloc, start, end,
&p, &parent, &bits);
if (err) if (err)
extent_io_tree_panic(tree, err); extent_io_tree_panic(tree, err);
...@@ -920,7 +951,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -920,7 +951,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* the later extent. * the later extent.
*/ */
err = insert_state(tree, prealloc, start, this_end, err = insert_state(tree, prealloc, start, this_end,
&bits); NULL, NULL, &bits);
if (err) if (err)
extent_io_tree_panic(tree, err); extent_io_tree_panic(tree, err);
...@@ -1006,6 +1037,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1006,6 +1037,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state; struct extent_state *state;
struct extent_state *prealloc = NULL; struct extent_state *prealloc = NULL;
struct rb_node *node; struct rb_node *node;
struct rb_node **p;
struct rb_node *parent;
int err = 0; int err = 0;
u64 last_start; u64 last_start;
u64 last_end; u64 last_end;
...@@ -1033,14 +1066,15 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1033,14 +1066,15 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
*/ */
node = tree_search(tree, start); node = tree_search_for_insert(tree, start, &p, &parent);
if (!node) { if (!node) {
prealloc = alloc_extent_state_atomic(prealloc); prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) { if (!prealloc) {
err = -ENOMEM; err = -ENOMEM;
goto out; goto out;
} }
err = insert_state(tree, prealloc, start, end, &bits); err = insert_state(tree, prealloc, start, end,
&p, &parent, &bits);
if (err) if (err)
extent_io_tree_panic(tree, err); extent_io_tree_panic(tree, err);
cache_state(prealloc, cached_state); cache_state(prealloc, cached_state);
...@@ -1137,7 +1171,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1137,7 +1171,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* the later extent. * the later extent.
*/ */
err = insert_state(tree, prealloc, start, this_end, err = insert_state(tree, prealloc, start, this_end,
&bits); NULL, NULL, &bits);
if (err) if (err)
extent_io_tree_panic(tree, err); extent_io_tree_panic(tree, err);
cache_state(prealloc, cached_state); cache_state(prealloc, cached_state);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment