Commit debf12d5 authored by Artem Bityutskiy's avatar Artem Bityutskiy

UBIFS: substitute the replay tree with a replay list

This patch simplifies replay even further - it removes the replay tree and
adds the replay list instead. Indeed, we just do not need to use a tree here -
all we need to do is to add all nodes to the list and then sort it. Using
RB-tree is an overkill - more code and slower. And since we replay buds in
order, we expect the nodes to follow in _mostly_ sorted order, so the merge
sort becomes much cheaper in average than an RB-tree.
Signed-off-by: default avatarArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
parent 074bcb9b
...@@ -33,22 +33,24 @@ ...@@ -33,22 +33,24 @@
*/ */
#include "ubifs.h" #include "ubifs.h"
#include <linux/list_sort.h>
/** /**
* struct replay_entry - replay tree entry. * struct replay_entry - replay list entry.
* @lnum: logical eraseblock number of the node * @lnum: logical eraseblock number of the node
* @offs: node offset * @offs: node offset
* @len: node length * @len: node length
* @deletion: non-zero if this entry corresponds to a node deletion * @deletion: non-zero if this entry corresponds to a node deletion
* @sqnum: node sequence number * @sqnum: node sequence number
* @rb: links the replay tree * @list: links the replay list
* @key: node key * @key: node key
* @nm: directory entry name * @nm: directory entry name
* @old_size: truncation old size * @old_size: truncation old size
* @new_size: truncation new size * @new_size: truncation new size
* *
* UBIFS journal replay must compare node sequence numbers, which means it must * The replay process first scans all buds and builds the replay list, then
* build a tree of node information to insert into the TNC. * sorts the replay list in nodes sequence number order, and then inserts all
* the replay entries to the TNC.
*/ */
struct replay_entry { struct replay_entry {
int lnum; int lnum;
...@@ -56,7 +58,7 @@ struct replay_entry { ...@@ -56,7 +58,7 @@ struct replay_entry {
int len; int len;
unsigned int deletion:1; unsigned int deletion:1;
unsigned long long sqnum; unsigned long long sqnum;
struct rb_node rb; struct list_head list;
union ubifs_key key; union ubifs_key key;
union { union {
struct qstr nm; struct qstr nm;
...@@ -263,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) ...@@ -263,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
} }
/** /**
* destroy_replay_tree - destroy the replay. * replay_entries_cmp - compare 2 replay entries.
* @c: UBIFS file-system description object * @priv: UBIFS file-system description object
* @a: first replay entry
* @a: second replay entry
* *
* Destroy the replay tree. * This is a comparios function for 'list_sort()' which compares 2 replay
* entries @a and @b by comparing their sequence numer. Returns %1 if @a has
* greater sequence number and %-1 otherwise.
*/ */
static void destroy_replay_tree(struct ubifs_info *c) static int replay_entries_cmp(void *priv, struct list_head *a,
struct list_head *b)
{ {
struct rb_node *this = c->replay_tree.rb_node; struct replay_entry *ra, *rb;
struct replay_entry *r;
while (this) { cond_resched();
if (this->rb_left) { if (a == b)
this = this->rb_left; return 0;
continue;
} else if (this->rb_right) { ra = list_entry(a, struct replay_entry, list);
this = this->rb_right; rb = list_entry(b, struct replay_entry, list);
continue; ubifs_assert(ra->sqnum != rb->sqnum);
} if (ra->sqnum > rb->sqnum)
r = rb_entry(this, struct replay_entry, rb); return 1;
this = rb_parent(this); return -1;
if (this) {
if (this->rb_left == &r->rb)
this->rb_left = NULL;
else
this->rb_right = NULL;
}
if (is_hash_key(c, &r->key))
kfree(r->nm.name);
kfree(r);
}
c->replay_tree = RB_ROOT;
} }
/** /**
* apply_replay_tree - apply the replay tree to the TNC. * apply_replay_list - apply the replay list to the TNC.
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* *
* Apply the replay tree. * Apply all entries in the replay list to the TNC. Returns zero in case of
* Returns zero in case of success and a negative error code in case of * success and a negative error code in case of failure.
* failure.
*/ */
static int apply_replay_tree(struct ubifs_info *c) static int apply_replay_list(struct ubifs_info *c)
{ {
struct rb_node *this = rb_first(&c->replay_tree);
while (this) {
struct replay_entry *r; struct replay_entry *r;
int err; int err;
list_sort(c, &c->replay_list, &replay_entries_cmp);
list_for_each_entry(r, &c->replay_list, list) {
cond_resched(); cond_resched();
r = rb_entry(this, struct replay_entry, rb);
err = apply_replay_entry(c, r); err = apply_replay_entry(c, r);
if (err) if (err)
return err; return err;
this = rb_next(this);
} }
return 0; return 0;
} }
/** /**
* insert_node - insert a node to the replay tree. * destroy_replay_list - destroy the replay.
* @c: UBIFS file-system description object
*
* Destroy the replay list.
*/
static void destroy_replay_list(struct ubifs_info *c)
{
struct replay_entry *r, *tmp;
list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
if (is_hash_key(c, &r->key))
kfree(r->nm.name);
list_del(&r->list);
kfree(r);
}
}
/**
* insert_node - insert a node to the replay list
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* @lnum: node logical eraseblock number * @lnum: node logical eraseblock number
* @offs: node offset * @offs: node offset
...@@ -336,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) ...@@ -336,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
* @old_size: truncation old size * @old_size: truncation old size
* @new_size: truncation new size * @new_size: truncation new size
* *
* This function inserts a scanned non-direntry node to the replay tree. The * This function inserts a scanned non-direntry node to the replay list. The
* replay tree is an RB-tree containing @struct replay_entry elements which are * replay list contains @struct replay_entry elements, and we sort this list in
* indexed by the sequence number. The replay tree is applied at the very end * sequence number order before applying it. The replay list is applied at the
* of the replay process. Since the tree is sorted in sequence number order, * very end of the replay process. Since the list is sorted in sequence number
* the older modifications are applied first. This function returns zero in * order, the older modifications are applied first. This function returns zero
* case of success and a negative error code in case of failure. * in case of success and a negative error code in case of failure.
*/ */
static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, unsigned long long sqnum, union ubifs_key *key, unsigned long long sqnum,
int deletion, int *used, loff_t old_size, int deletion, int *used, loff_t old_size,
loff_t new_size) loff_t new_size)
{ {
struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r; struct replay_entry *r;
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
if (key_inum(c, key) >= c->highest_inum) if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key); c->highest_inum = key_inum(c, key);
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
while (*p) {
parent = *p;
r = rb_entry(parent, struct replay_entry, rb);
if (sqnum < r->sqnum) {
p = &(*p)->rb_left;
continue;
} else if (sqnum > r->sqnum) {
p = &(*p)->rb_right;
continue;
}
ubifs_err("duplicate sqnum in replay");
return -EINVAL;
}
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r) if (!r)
return -ENOMEM; return -ENOMEM;
...@@ -384,13 +381,12 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, ...@@ -384,13 +381,12 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
r->old_size = old_size; r->old_size = old_size;
r->new_size = new_size; r->new_size = new_size;
rb_link_node(&r->rb, parent, p); list_add_tail(&r->list, &c->replay_list);
rb_insert_color(&r->rb, &c->replay_tree);
return 0; return 0;
} }
/** /**
* insert_dent - insert a directory entry node into the replay tree. * insert_dent - insert a directory entry node into the replay list.
* @c: UBIFS file-system description object * @c: UBIFS file-system description object
* @lnum: node logical eraseblock number * @lnum: node logical eraseblock number
* @offs: node offset * @offs: node offset
...@@ -402,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, ...@@ -402,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
* @deletion: non-zero if this is a deletion * @deletion: non-zero if this is a deletion
* @used: number of bytes in use in a LEB * @used: number of bytes in use in a LEB
* *
* This function inserts a scanned directory entry node to the replay tree. * This function inserts a scanned directory entry node or an extended
* Returns zero in case of success and a negative error code in case of * attribute entry to the replay list. Returns zero in case of success and a
* failure. * negative error code in case of failure.
*
* This function is also used for extended attribute entries because they are
* implemented as directory entry nodes.
*/ */
static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, const char *name, int nlen, union ubifs_key *key, const char *name, int nlen,
unsigned long long sqnum, int deletion, int *used) unsigned long long sqnum, int deletion, int *used)
{ {
struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r; struct replay_entry *r;
char *nbuf; char *nbuf;
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
if (key_inum(c, key) >= c->highest_inum) if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key); c->highest_inum = key_inum(c, key);
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
while (*p) {
parent = *p;
r = rb_entry(parent, struct replay_entry, rb);
if (sqnum < r->sqnum) {
p = &(*p)->rb_left;
continue;
}
if (sqnum > r->sqnum) {
p = &(*p)->rb_right;
continue;
}
ubifs_err("duplicate sqnum in replay");
return -EINVAL;
}
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r) if (!r)
return -ENOMEM; return -ENOMEM;
nbuf = kmalloc(nlen + 1, GFP_KERNEL); nbuf = kmalloc(nlen + 1, GFP_KERNEL);
if (!nbuf) { if (!nbuf) {
kfree(r); kfree(r);
...@@ -458,9 +436,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, ...@@ -458,9 +436,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
nbuf[nlen] = '\0'; nbuf[nlen] = '\0';
r->nm.name = nbuf; r->nm.name = nbuf;
ubifs_assert(!*p); list_add_tail(&r->list, &c->replay_list);
rb_link_node(&r->rb, parent, p);
rb_insert_color(&r->rb, &c->replay_tree);
return 0; return 0;
} }
...@@ -1017,7 +993,7 @@ int ubifs_replay_journal(struct ubifs_info *c) ...@@ -1017,7 +993,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
if (err) if (err)
goto out; goto out;
err = apply_replay_tree(c); err = apply_replay_list(c);
if (err) if (err)
goto out; goto out;
...@@ -1039,7 +1015,7 @@ int ubifs_replay_journal(struct ubifs_info *c) ...@@ -1039,7 +1015,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
(unsigned long)c->highest_inum); (unsigned long)c->highest_inum);
out: out:
destroy_replay_tree(c); destroy_replay_list(c);
destroy_bud_list(c); destroy_bud_list(c);
c->replaying = 0; c->replaying = 0;
return err; return err;
......
...@@ -1205,7 +1205,6 @@ struct ubifs_debug_info; ...@@ -1205,7 +1205,6 @@ struct ubifs_debug_info;
* @replaying: %1 during journal replay * @replaying: %1 during journal replay
* @mounting: %1 while mounting * @mounting: %1 while mounting
* @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
* @replay_tree: temporary tree used during journal replay
* @replay_list: temporary list used during journal replay * @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay * @replay_buds: list of buds to replay
* @cs_sqnum: sequence number of first node in the log (commit start node) * @cs_sqnum: sequence number of first node in the log (commit start node)
...@@ -1435,7 +1434,6 @@ struct ubifs_info { ...@@ -1435,7 +1434,6 @@ struct ubifs_info {
unsigned int replaying:1; unsigned int replaying:1;
unsigned int mounting:1; unsigned int mounting:1;
unsigned int remounting_rw:1; unsigned int remounting_rw:1;
struct rb_root replay_tree;
struct list_head replay_list; struct list_head replay_list;
struct list_head replay_buds; struct list_head replay_buds;
unsigned long long cs_sqnum; unsigned long long cs_sqnum;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment