Commit 5eecb9cc authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "I held off on my rc5 pull because I hit an oops during log recovery
  after a crash.  I wanted to make sure it wasn't a regression because
  we have some logging fixes in here.

  It turns out that a commit during the merge window just made it much
  more likely to trigger directory logging instead of full commits,
  which exposed an old bug.

  The new backref walking code got some additional fixes.  This should
  be the final set of them.

  Josef fixed up a corner where our O_DIRECT writes and buffered reads
  could expose old file contents (not stale, just not the most recent).
  He and Liu Bo fixed crashes during tree log recover as well.

  Ilya fixed errors while we resume disk balancing operations on
  readonly mounts."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: run delayed directory updates during log replay
  Btrfs: hold a ref on the inode during writepages
  Btrfs: fix tree log remove space corner case
  Btrfs: fix wrong check during log recovery
  Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS
  Btrfs: resume balance on rw (re)mounts properly
  Btrfs: restore restriper state on all mounts
  Btrfs: fix dio write vs buffered read race
  Btrfs: don't count I/O statistic read errors for missing devices
  Btrfs: resolve tree mod log locking issue in btrfs_next_leaf
  Btrfs: fix tree mod log rewind of ADD operations
  Btrfs: leave critical region in btrfs_find_all_roots as soon as possible
  Btrfs: always put insert_ptr modifications into the tree mod log
  Btrfs: fix tree mod log for root replacements at leaf level
  Btrfs: support root level changes in __resolve_indirect_ref
  Btrfs: avoid waiting for delayed refs when we must not
parents 62ad6449 b6305567
...@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, ...@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out; goto out;
eb = path->nodes[level]; eb = path->nodes[level];
if (!eb) { while (!eb) {
WARN_ON(1); if (!level) {
ret = 1; WARN_ON(1);
goto out; ret = 1;
goto out;
}
level--;
eb = path->nodes[level];
} }
ret = add_all_parents(root, path, parents, level, &ref->key_for_search, ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
...@@ -835,6 +839,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ...@@ -835,6 +839,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
} }
ret = __add_delayed_refs(head, delayed_ref_seq, ret = __add_delayed_refs(head, delayed_ref_seq,
&prefs_delayed); &prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) { if (ret) {
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
goto out; goto out;
...@@ -928,8 +933,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, ...@@ -928,8 +933,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
} }
out: out:
if (head)
mutex_unlock(&head->mutex);
btrfs_free_path(path); btrfs_free_path(path);
while (!list_empty(&prefs)) { while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list); ref = list_first_entry(&prefs, struct __prelim_ref, list);
......
...@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, ...@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
if (!looped && !tm) if (!looped && !tm)
return 0; return 0;
/* /*
* we must have key remove operations in the log before the * if there are no tree operation for the oldest root, we simply
* replace operation. * return it. this should only happen if that (old) root is at
* level 0.
*/ */
BUG_ON(!tm); if (!tm)
break;
/*
* if there's an operation that's not a root replacement, we
* found the oldest version of our root. normally, we'll find a
* MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
*/
if (tm->op != MOD_LOG_ROOT_REPLACE) if (tm->op != MOD_LOG_ROOT_REPLACE)
break; break;
...@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, ...@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
tm->generation); tm->generation);
break; break;
case MOD_LOG_KEY_ADD: case MOD_LOG_KEY_ADD:
if (tm->slot != n - 1) { /* if a move operation is needed it's in the log */
o_dst = btrfs_node_key_ptr_offset(tm->slot);
o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
memmove_extent_buffer(eb, o_dst, o_src, p_size);
}
n--; n--;
break; break;
case MOD_LOG_MOVE_KEYS: case MOD_LOG_MOVE_KEYS:
...@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) ...@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
} }
tm = tree_mod_log_search(root->fs_info, logical, time_seq); tm = tree_mod_log_search(root->fs_info, logical, time_seq);
/*
* there was an item in the log when __tree_mod_log_oldest_root
* returned. this one must not go away, because the time_seq passed to
* us must be blocking its removal.
*/
BUG_ON(!tm);
if (old_root) if (old_root)
eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, eb = alloc_dummy_extent_buffer(logical, root->nodesize);
root->nodesize);
else else
eb = btrfs_clone_extent_buffer(root->node); eb = btrfs_clone_extent_buffer(root->node);
btrfs_tree_read_unlock(root->node); btrfs_tree_read_unlock(root->node);
...@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq) ...@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level); btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation); btrfs_set_header_generation(eb, old_generation);
} }
__tree_mod_log_rewind(eb, time_seq, tm); if (tm)
__tree_mod_log_rewind(eb, time_seq, tm);
else
WARN_ON(btrfs_header_level(eb) != 0);
extent_buffer_get(eb); extent_buffer_get(eb);
return eb; return eb;
...@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, ...@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
static void insert_ptr(struct btrfs_trans_handle *trans, static void insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path, struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr, struct btrfs_disk_key *key, u64 bytenr,
int slot, int level, int tree_mod_log) int slot, int level)
{ {
struct extent_buffer *lower; struct extent_buffer *lower;
int nritems; int nritems;
...@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, ...@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
BUG_ON(slot > nritems); BUG_ON(slot > nritems);
BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
if (slot != nritems) { if (slot != nritems) {
if (tree_mod_log && level) if (level)
tree_mod_log_eb_move(root->fs_info, lower, slot + 1, tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
slot, nritems - slot); slot, nritems - slot);
memmove_extent_buffer(lower, memmove_extent_buffer(lower,
...@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, ...@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_key_ptr)); (nritems - slot) * sizeof(struct btrfs_key_ptr));
} }
if (tree_mod_log && level) { if (level) {
ret = tree_mod_log_insert_key(root->fs_info, lower, slot, ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
MOD_LOG_KEY_ADD); MOD_LOG_KEY_ADD);
BUG_ON(ret < 0); BUG_ON(ret < 0);
...@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, ...@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(split); btrfs_mark_buffer_dirty(split);
insert_ptr(trans, root, path, &disk_key, split->start, insert_ptr(trans, root, path, &disk_key, split->start,
path->slots[level + 1] + 1, level + 1, 1); path->slots[level + 1] + 1, level + 1);
if (path->slots[level] >= mid) { if (path->slots[level] >= mid) {
path->slots[level] -= mid; path->slots[level] -= mid;
...@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, ...@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid); btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0); btrfs_item_key(right, &disk_key, 0);
insert_ptr(trans, root, path, &disk_key, right->start, insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1] + 1, 1, 0); path->slots[1] + 1, 1);
btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l); btrfs_mark_buffer_dirty(l);
...@@ -3848,7 +3846,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, ...@@ -3848,7 +3846,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
if (mid <= slot) { if (mid <= slot) {
btrfs_set_header_nritems(right, 0); btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start, insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1] + 1, 1, 0); path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]); btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]); free_extent_buffer(path->nodes[0]);
path->nodes[0] = right; path->nodes[0] = right;
...@@ -3857,7 +3855,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, ...@@ -3857,7 +3855,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
} else { } else {
btrfs_set_header_nritems(right, 0); btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start, insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1], 1, 0); path->slots[1], 1);
btrfs_tree_unlock(path->nodes[0]); btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]); free_extent_buffer(path->nodes[0]);
path->nodes[0] = right; path->nodes[0] = right;
...@@ -5121,6 +5119,18 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, ...@@ -5121,6 +5119,18 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
if (!path->skip_locking) { if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next); ret = btrfs_try_tree_read_lock(next);
if (!ret && time_seq) {
/*
* If we don't get the lock, we may be racing
* with push_leaf_left, holding that lock while
* itself waiting for the leaf we've currently
* locked. To solve this situation, we give up
* on our lock and cycle.
*/
btrfs_release_path(path);
cond_resched();
goto again;
}
if (!ret) { if (!ret) {
btrfs_set_path_blocking(path); btrfs_set_path_blocking(path);
btrfs_tree_read_lock(next); btrfs_tree_read_lock(next);
......
...@@ -2354,12 +2354,17 @@ int open_ctree(struct super_block *sb, ...@@ -2354,12 +2354,17 @@ int open_ctree(struct super_block *sb,
BTRFS_CSUM_TREE_OBJECTID, csum_root); BTRFS_CSUM_TREE_OBJECTID, csum_root);
if (ret) if (ret)
goto recovery_tree_root; goto recovery_tree_root;
csum_root->track_dirty = 1; csum_root->track_dirty = 1;
fs_info->generation = generation; fs_info->generation = generation;
fs_info->last_trans_committed = generation; fs_info->last_trans_committed = generation;
ret = btrfs_recover_balance(fs_info);
if (ret) {
printk(KERN_WARNING "btrfs: failed to recover balance\n");
goto fail_block_groups;
}
ret = btrfs_init_dev_stats(fs_info); ret = btrfs_init_dev_stats(fs_info);
if (ret) { if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
...@@ -2485,20 +2490,23 @@ int open_ctree(struct super_block *sb, ...@@ -2485,20 +2490,23 @@ int open_ctree(struct super_block *sb,
goto fail_trans_kthread; goto fail_trans_kthread;
} }
if (!(sb->s_flags & MS_RDONLY)) { if (sb->s_flags & MS_RDONLY)
down_read(&fs_info->cleanup_work_sem); return 0;
err = btrfs_orphan_cleanup(fs_info->fs_root);
if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
if (!err) down_read(&fs_info->cleanup_work_sem);
err = btrfs_recover_balance(fs_info->tree_root); if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
(ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
up_read(&fs_info->cleanup_work_sem);
close_ctree(tree_root);
return ret;
}
up_read(&fs_info->cleanup_work_sem);
if (err) { ret = btrfs_resume_balance_async(fs_info);
close_ctree(tree_root); if (ret) {
return err; printk(KERN_WARNING "btrfs: failed to resume balance\n");
} close_ctree(tree_root);
return ret;
} }
return 0; return 0;
......
...@@ -2347,12 +2347,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ...@@ -2347,12 +2347,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
return count; return count;
} }
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs) unsigned long num_refs,
struct list_head *first_seq)
{ {
struct list_head *first_seq = delayed_refs->seq_head.next;
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n", pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq); num_refs, first_seq);
...@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_node *ref;
struct list_head cluster; struct list_head cluster;
struct list_head *first_seq = NULL;
int ret; int ret;
u64 delayed_start; u64 delayed_start;
int run_all = count == (unsigned long)-1; int run_all = count == (unsigned long)-1;
...@@ -2436,8 +2435,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -2436,8 +2435,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/ */
consider_waiting = 1; consider_waiting = 1;
num_refs = delayed_refs->num_entries; num_refs = delayed_refs->num_entries;
first_seq = root->fs_info->tree_mod_seq_list.next;
} else { } else {
wait_for_more_refs(delayed_refs, num_refs); wait_for_more_refs(delayed_refs,
num_refs, first_seq);
/* /*
* after waiting, things have changed. we * after waiting, things have changed. we
* dropped the lock and someone else might have * dropped the lock and someone else might have
......
...@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
writepage_t writepage, void *data, writepage_t writepage, void *data,
void (*flush_fn)(void *)) void (*flush_fn)(void *))
{ {
struct inode *inode = mapping->host;
int ret = 0; int ret = 0;
int done = 0; int done = 0;
int nr_to_write_done = 0; int nr_to_write_done = 0;
...@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
int scanned = 0; int scanned = 0;
int tag; int tag;
/*
* We have to hold onto the inode so that ordered extents can do their
* work when the IO finishes. The alternative to this is failing to add
* an ordered extent if the igrab() fails there and that is a huge pain
* to deal with, so instead just hold onto the inode throughout the
* writepages operation. If it fails here we are freeing up the inode
* anyway and we'd rather not waste our time writing out stuff that is
* going to be truncated anyway.
*/
if (!igrab(inode))
return 0;
pagevec_init(&pvec, 0); pagevec_init(&pvec, 0);
if (wbc->range_cyclic) { if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */ index = mapping->writeback_index; /* Start from prev offset */
...@@ -3428,6 +3441,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3428,6 +3441,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
index = 0; index = 0;
goto retry; goto retry;
} }
btrfs_add_delayed_iput(inode);
return ret; return ret;
} }
......
...@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, ...@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
loff_t *ppos, size_t count, size_t ocount) loff_t *ppos, size_t count, size_t ocount)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct iov_iter i; struct iov_iter i;
ssize_t written; ssize_t written;
ssize_t written_buffered; ssize_t written_buffered;
...@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, ...@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
count, ocount); count, ocount);
/*
* the generic O_DIRECT will update in-memory i_size after the
* DIOs are done. But our endio handlers that update the on
* disk i_size never update past the in memory i_size. So we
* need one more update here to catch any additions to the
* file
*/
if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
mark_inode_dirty(inode);
}
if (written < 0 || written == count) if (written < 0 || written == count)
return written; return written;
......
...@@ -1543,29 +1543,26 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1543,29 +1543,26 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
/* /*
* XXX - this can go away after a few releases. * We need to search for bits in this bitmap. We could only cover some
* * of the extent in this bitmap thanks to how we add space, so we need
* since the only user of btrfs_remove_free_space is the tree logging * to search for as much as it as we can and clear that amount, and then
* stuff, and the only way to test that is under crash conditions, we * go searching for the next bit.
* want to have this debug stuff here just in case somethings not
* working. Search the bitmap for the space we are trying to use to
* make sure its actually there. If its not there then we need to stop
* because something has gone wrong.
*/ */
search_start = *offset; search_start = *offset;
search_bytes = *bytes; search_bytes = ctl->unit;
search_bytes = min(search_bytes, end - search_start + 1); search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
BUG_ON(ret < 0 || search_start != *offset); BUG_ON(ret < 0 || search_start != *offset);
if (*offset > bitmap_info->offset && *offset + *bytes > end) { /* We may have found more bits than what we need */
bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); search_bytes = min(search_bytes, *bytes);
*bytes -= end - *offset + 1;
*offset = end + 1; /* Cannot clear past the end of the bitmap */
} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { search_bytes = min(search_bytes, end - search_start + 1);
bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
*bytes = 0; bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
} *offset += search_bytes;
*bytes -= search_bytes;
if (*bytes) { if (*bytes) {
struct rb_node *next = rb_next(&bitmap_info->offset_index); struct rb_node *next = rb_next(&bitmap_info->offset_index);
...@@ -1596,7 +1593,7 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1596,7 +1593,7 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
* everything over again. * everything over again.
*/ */
search_start = *offset; search_start = *offset;
search_bytes = *bytes; search_bytes = ctl->unit;
ret = search_bitmap(ctl, bitmap_info, &search_start, ret = search_bitmap(ctl, bitmap_info, &search_start,
&search_bytes); &search_bytes);
if (ret < 0 || search_start != *offset) if (ret < 0 || search_start != *offset)
...@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ...@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
{ {
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info; struct btrfs_free_space *info;
struct btrfs_free_space *next_info = NULL;
int ret = 0; int ret = 0;
spin_lock(&ctl->tree_lock); spin_lock(&ctl->tree_lock);
again: again:
if (!bytes)
goto out_lock;
info = tree_search_offset(ctl, offset, 0, 0); info = tree_search_offset(ctl, offset, 0, 0);
if (!info) { if (!info) {
/* /*
...@@ -1905,88 +1904,48 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ...@@ -1905,88 +1904,48 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
} }
} }
if (info->bytes < bytes && rb_next(&info->offset_index)) { if (!info->bitmap) {
u64 end;
next_info = rb_entry(rb_next(&info->offset_index),
struct btrfs_free_space,
offset_index);
if (next_info->bitmap)
end = next_info->offset +
BITS_PER_BITMAP * ctl->unit - 1;
else
end = next_info->offset + next_info->bytes;
if (next_info->bytes < bytes ||
next_info->offset > offset || offset > end) {
printk(KERN_CRIT "Found free space at %llu, size %llu,"
" trying to use %llu\n",
(unsigned long long)info->offset,
(unsigned long long)info->bytes,
(unsigned long long)bytes);
WARN_ON(1);
ret = -EINVAL;
goto out_lock;
}
info = next_info;
}
if (info->bytes == bytes) {
unlink_free_space(ctl, info); unlink_free_space(ctl, info);
if (info->bitmap) { if (offset == info->offset) {
kfree(info->bitmap); u64 to_free = min(bytes, info->bytes);
ctl->total_bitmaps--;
} info->bytes -= to_free;
kmem_cache_free(btrfs_free_space_cachep, info); info->offset += to_free;
ret = 0; if (info->bytes) {
goto out_lock; ret = link_free_space(ctl, info);
} WARN_ON(ret);
} else {
if (!info->bitmap && info->offset == offset) { kmem_cache_free(btrfs_free_space_cachep, info);
unlink_free_space(ctl, info); }
info->offset += bytes;
info->bytes -= bytes;
ret = link_free_space(ctl, info);
WARN_ON(ret);
goto out_lock;
}
if (!info->bitmap && info->offset <= offset && offset += to_free;
info->offset + info->bytes >= offset + bytes) { bytes -= to_free;
u64 old_start = info->offset; goto again;
/* } else {
* we're freeing space in the middle of the info, u64 old_end = info->bytes + info->offset;
* this can happen during tree log replay
*
* first unlink the old info and then
* insert it again after the hole we're creating
*/
unlink_free_space(ctl, info);
if (offset + bytes < info->offset + info->bytes) {
u64 old_end = info->offset + info->bytes;
info->offset = offset + bytes; info->bytes = offset - info->offset;
info->bytes = old_end - info->offset;
ret = link_free_space(ctl, info); ret = link_free_space(ctl, info);
WARN_ON(ret); WARN_ON(ret);
if (ret) if (ret)
goto out_lock; goto out_lock;
} else {
/* the hole we're creating ends at the end
* of the info struct, just free the info
*/
kmem_cache_free(btrfs_free_space_cachep, info);
}
spin_unlock(&ctl->tree_lock);
/* step two, insert a new info struct to cover /* Not enough bytes in this entry to satisfy us */
* anything before the hole if (old_end < offset + bytes) {
*/ bytes -= old_end - offset;
ret = btrfs_add_free_space(block_group, old_start, offset = old_end;
offset - old_start); goto again;
WARN_ON(ret); /* -ENOMEM */ } else if (old_end == offset + bytes) {
goto out; /* all done */
goto out_lock;
}
spin_unlock(&ctl->tree_lock);
ret = btrfs_add_free_space(block_group, offset + bytes,
old_end - (offset + bytes));
WARN_ON(ret);
goto out;
}
} }
ret = remove_from_bitmap(ctl, info, &offset, &bytes); ret = remove_from_bitmap(ctl, info, &offset, &bytes);
......
...@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode) ...@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (root->fs_info->log_root_recovering) { if (root->fs_info->log_root_recovering) {
BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)); &BTRFS_I(inode)->runtime_flags));
goto no_delete; goto no_delete;
} }
...@@ -5876,8 +5876,17 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ...@@ -5876,8 +5876,17 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
bh_result->b_size = len; bh_result->b_size = len;
bh_result->b_bdev = em->bdev; bh_result->b_bdev = em->bdev;
set_buffer_mapped(bh_result); set_buffer_mapped(bh_result);
if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) if (create) {
set_buffer_new(bh_result); if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
set_buffer_new(bh_result);
/*
* Need to update the i_size under the extent lock so buffered
* readers will get the updated i_size when we unlock.
*/
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
}
free_extent_map(em); free_extent_map(em);
...@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ...@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
*/ */
ordered = btrfs_lookup_ordered_range(inode, lockstart, ordered = btrfs_lookup_ordered_range(inode, lockstart,
lockend - lockstart + 1); lockend - lockstart + 1);
if (!ordered)
/*
* We need to make sure there are no buffered pages in this
* range either, we could have raced between the invalidate in
* generic_file_direct_write and locking the extent. The
* invalidate needs to happen so that reads after a write do not
* get stale data.
*/
if (!ordered && (!writing ||
!test_range_bit(&BTRFS_I(inode)->io_tree,
lockstart, lockend, EXTENT_UPTODATE, 0,
cached_state)))
break; break;
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS); &cached_state, GFP_NOFS);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered); if (ordered) {
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
} else {
/* Screw you mmap */
ret = filemap_write_and_wait_range(file->f_mapping,
lockstart,
lockend);
if (ret)
goto out;
/*
* If we found a page that couldn't be invalidated just
* fall back to buffered.
*/
ret = invalidate_inode_pages2_range(file->f_mapping,
lockstart >> PAGE_CACHE_SHIFT,
lockend >> PAGE_CACHE_SHIFT);
if (ret) {
if (ret == -EBUSY)
ret = 0;
goto out;
}
}
cond_resched(); cond_resched();
} }
......
...@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats { ...@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2) struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
struct btrfs_ioctl_scrub_args) struct btrfs_ioctl_scrub_args)
......
...@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret) if (ret)
goto restore; goto restore;
ret = btrfs_resume_balance_async(fs_info);
if (ret)
goto restore;
sb->s_flags &= ~MS_RDONLY; sb->s_flags &= ~MS_RDONLY;
} }
......
...@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ...@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
kfree(name); kfree(name);
iput(inode); iput(inode);
btrfs_run_delayed_items(trans, root);
return ret; return ret;
} }
...@@ -895,6 +897,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ...@@ -895,6 +897,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = btrfs_unlink_inode(trans, root, dir, ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name, inode, victim_name,
victim_name_len); victim_name_len);
btrfs_run_delayed_items(trans, root);
} }
kfree(victim_name); kfree(victim_name);
ptr = (unsigned long)(victim_ref + 1) + victim_name_len; ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
...@@ -1475,6 +1478,9 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, ...@@ -1475,6 +1478,9 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
ret = btrfs_unlink_inode(trans, root, dir, inode, ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len); name, name_len);
BUG_ON(ret); BUG_ON(ret);
btrfs_run_delayed_items(trans, root);
kfree(name); kfree(name);
iput(inode); iput(inode);
......
...@@ -2845,31 +2845,48 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2845,31 +2845,48 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
static int balance_kthread(void *data) static int balance_kthread(void *data)
{ {
struct btrfs_balance_control *bctl = struct btrfs_fs_info *fs_info = data;
(struct btrfs_balance_control *)data;
struct btrfs_fs_info *fs_info = bctl->fs_info;
int ret = 0; int ret = 0;
mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex); mutex_lock(&fs_info->balance_mutex);
set_balance_control(bctl); if (fs_info->balance_ctl) {
if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
printk(KERN_INFO "btrfs: force skipping balance\n");
} else {
printk(KERN_INFO "btrfs: continuing balance\n"); printk(KERN_INFO "btrfs: continuing balance\n");
ret = btrfs_balance(bctl, NULL); ret = btrfs_balance(fs_info->balance_ctl, NULL);
} }
mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex); mutex_unlock(&fs_info->volume_mutex);
return ret; return ret;
} }
int btrfs_recover_balance(struct btrfs_root *tree_root) int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
{ {
struct task_struct *tsk; struct task_struct *tsk;
spin_lock(&fs_info->balance_lock);
if (!fs_info->balance_ctl) {
spin_unlock(&fs_info->balance_lock);
return 0;
}
spin_unlock(&fs_info->balance_lock);
if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
printk(KERN_INFO "btrfs: force skipping balance\n");
return 0;
}
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
if (IS_ERR(tsk))
return PTR_ERR(tsk);
return 0;
}
int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl; struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item; struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs; struct btrfs_disk_balance_args disk_bargs;
...@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) ...@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out;
}
key.objectid = BTRFS_BALANCE_OBJECTID; key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY; key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0; key.offset = 0;
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto out_bctl; goto out;
if (ret > 0) { /* ret = -ENOENT; */ if (ret > 0) { /* ret = -ENOENT; */
ret = 0; ret = 0;
goto out_bctl; goto out;
}
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out;
} }
leaf = path->nodes[0]; leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
bctl->fs_info = tree_root->fs_info; bctl->fs_info = fs_info;
bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; bctl->flags = btrfs_balance_flags(leaf, item);
bctl->flags |= BTRFS_BALANCE_RESUME;
btrfs_balance_data(leaf, item, &disk_bargs); btrfs_balance_data(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
...@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) ...@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); mutex_lock(&fs_info->volume_mutex);
if (IS_ERR(tsk)) mutex_lock(&fs_info->balance_mutex);
ret = PTR_ERR(tsk);
else
goto out;
out_bctl: set_balance_control(bctl);
kfree(bctl);
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
out: out:
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
...@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err) ...@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
BUG_ON(stripe_index >= bbio->num_stripes); BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev; dev = bbio->stripes[stripe_index].dev;
if (bio->bi_rw & WRITE) if (dev->bdev) {
btrfs_dev_stat_inc(dev, if (bio->bi_rw & WRITE)
BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_stat_inc(dev,
else BTRFS_DEV_STAT_WRITE_ERRS);
btrfs_dev_stat_inc(dev, else
BTRFS_DEV_STAT_READ_ERRS); btrfs_dev_stat_inc(dev,
if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) BTRFS_DEV_STAT_READ_ERRS);
btrfs_dev_stat_inc(dev, if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
BTRFS_DEV_STAT_FLUSH_ERRS); btrfs_dev_stat_inc(dev,
btrfs_dev_stat_print_on_error(dev); BTRFS_DEV_STAT_FLUSH_ERRS);
btrfs_dev_stat_print_on_error(dev);
}
} }
} }
......
...@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); ...@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl, int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs); struct btrfs_ioctl_balance_args *bargs);
int btrfs_recover_balance(struct btrfs_root *tree_root); int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment