Commit 9512c47e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "Some fixes for btrfs send/recv and fsync from Filipe and Robbie Ko.

  Bonus points to Filipe for already having xfstests in place for many
  of these"

* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: remove unused function btrfs_add_delayed_qgroup_reserve()
  Btrfs: improve performance on fsync against new inode after rename/unlink
  Btrfs: be more precise on errors when getting an inode from disk
  Btrfs: send, don't bug on inconsistent snapshots
  Btrfs: send, avoid incorrect leaf accesses when sending utimes operations
  Btrfs: send, fix invalid leaf accesses due to incorrect utimes operations
  Btrfs: send, fix warning due to late freeing of orphan_dir_info structures
  Btrfs: incremental send, fix premature rmdir operations
  Btrfs: incremental send, fix invalid paths for rename operations
  Btrfs: send, add missing error check for calls to path_loop()
  Btrfs: send, fix failure to move directories with the same name around
  Btrfs: add missing check for writeback errors on fsync
parents 315581a2 10838816
......@@ -862,33 +862,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
return 0;
}
int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 ref_root, u64 bytenr, u64 num_bytes)
{
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_head *ref_head;
int ret = 0;
if (!fs_info->quota_enabled || !is_fstree(ref_root))
return 0;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
if (!ref_head) {
ret = -ENOENT;
goto out;
}
WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
ref_head->qgroup_ref_root = ref_root;
ref_head->qgroup_reserved = num_bytes;
out:
spin_unlock(&delayed_refs->lock);
return ret;
}
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
......
......@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 ref_root, u64 bytenr, u64 num_bytes);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
......
......@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
/*
* An ordered extent might have started before and completed
* already with io errors, in which case the inode was not
* updated and we end up here. So check the inode's mapping
* flags for any errors that might have happened while doing
* writeback of file data.
*/
ret = btrfs_inode_check_errors(inode);
inode_unlock(inode);
goto out;
}
......
......@@ -3435,10 +3435,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
ret = PTR_ERR_OR_ZERO(inode);
if (ret && ret != -ESTALE)
if (ret && ret != -ENOENT)
goto out;
if (ret == -ESTALE && root == root->fs_info->tree_root) {
if (ret == -ENOENT && root == root->fs_info->tree_root) {
struct btrfs_root *dead_root;
struct btrfs_fs_info *fs_info = root->fs_info;
int is_dead_root = 0;
......@@ -3474,7 +3474,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
*/
if (ret == -ESTALE) {
if (ret == -ENOENT) {
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
......@@ -3633,7 +3633,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
/*
* read an inode from the btree into the in-memory inode
*/
static void btrfs_read_locked_inode(struct inode *inode)
static int btrfs_read_locked_inode(struct inode *inode)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
......@@ -3652,14 +3652,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
filled = true;
path = btrfs_alloc_path();
if (!path)
if (!path) {
ret = -ENOMEM;
goto make_bad;
}
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret)
if (ret) {
if (ret > 0)
ret = -ENOENT;
goto make_bad;
}
leaf = path->nodes[0];
......@@ -3812,11 +3817,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
}
btrfs_update_iflags(inode);
return;
return 0;
make_bad:
btrfs_free_path(path);
make_bad_inode(inode);
return ret;
}
/*
......@@ -4204,6 +4210,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
......@@ -4226,11 +4233,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (err)
goto out;
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
/* now the directory is empty */
err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
dentry->d_name.name, dentry->d_name.len);
if (!err)
if (!err) {
btrfs_i_size_write(inode, 0);
/*
* Propagate the last_unlink_trans value of the deleted dir to
* its parent directory. This is to prevent an unrecoverable
* log tree in the case we do something like this:
* 1) create dir foo
* 2) create snapshot under dir foo
* 3) delete the snapshot
* 4) rmdir foo
* 5) mkdir foo
* 6) fsync foo or some file inside foo
*/
if (last_unlink_trans >= trans->transid)
BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
}
out:
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
......@@ -5606,7 +5629,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
btrfs_read_locked_inode(inode);
int ret;
ret = btrfs_read_locked_inode(inode);
if (!is_bad_inode(inode)) {
inode_tree_add(inode);
unlock_new_inode(inode);
......@@ -5615,7 +5640,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
} else {
unlock_new_inode(inode);
iput(inode);
inode = ERR_PTR(-ESTALE);
ASSERT(ret < 0);
inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
}
}
......
......@@ -231,7 +231,6 @@ struct pending_dir_move {
u64 parent_ino;
u64 ino;
u64 gen;
bool is_orphan;
struct list_head update_refs;
};
......@@ -274,6 +273,39 @@ struct name_cache_entry {
char name[];
};
static void inconsistent_snapshot_error(struct send_ctx *sctx,
enum btrfs_compare_tree_result result,
const char *what)
{
const char *result_string;
switch (result) {
case BTRFS_COMPARE_TREE_NEW:
result_string = "new";
break;
case BTRFS_COMPARE_TREE_DELETED:
result_string = "deleted";
break;
case BTRFS_COMPARE_TREE_CHANGED:
result_string = "updated";
break;
case BTRFS_COMPARE_TREE_SAME:
ASSERT(0);
result_string = "unchanged";
break;
default:
ASSERT(0);
result_string = "unexpected";
}
btrfs_err(sctx->send_root->fs_info,
"Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
result_string, what, sctx->cmp_key->objectid,
sctx->send_root->root_key.objectid,
(sctx->parent_root ?
sctx->parent_root->root_key.objectid : 0));
}
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
static struct waiting_dir_move *
......@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
* was already unlinked/moved, so we can safely assume that we will not
* overwrite anything at this point in time.
*/
if (other_inode > sctx->send_progress) {
if (other_inode > sctx->send_progress ||
is_waiting_for_move(sctx, other_inode)) {
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
who_gen, NULL, NULL, NULL, NULL);
if (ret < 0)
......@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
goto out;
......@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
}
if (loc.objectid > send_progress) {
struct orphan_dir_info *odi;
odi = get_orphan_dir_info(sctx, dir);
free_orphan_dir_info(sctx, odi);
ret = 0;
goto out;
}
......@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
pm->parent_ino = parent_ino;
pm->ino = ino;
pm->gen = ino_gen;
pm->is_orphan = is_orphan;
INIT_LIST_HEAD(&pm->list);
INIT_LIST_HEAD(&pm->update_refs);
RB_CLEAR_NODE(&pm->node);
......@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
return NULL;
}
static int path_loop(struct send_ctx *sctx, struct fs_path *name,
u64 ino, u64 gen, u64 *ancestor_ino)
{
int ret = 0;
u64 parent_inode = 0;
u64 parent_gen = 0;
u64 start_ino = ino;
*ancestor_ino = 0;
while (ino != BTRFS_FIRST_FREE_OBJECTID) {
fs_path_reset(name);
if (is_waiting_for_rm(sctx, ino))
break;
if (is_waiting_for_move(sctx, ino)) {
if (*ancestor_ino == 0)
*ancestor_ino = ino;
ret = get_first_ref(sctx->parent_root, ino,
&parent_inode, &parent_gen, name);
} else {
ret = __get_cur_name_and_parent(sctx, ino, gen,
&parent_inode,
&parent_gen, name);
if (ret > 0) {
ret = 0;
break;
}
}
if (ret < 0)
break;
if (parent_inode == start_ino) {
ret = 1;
if (*ancestor_ino == 0)
*ancestor_ino = ino;
break;
}
ino = parent_inode;
gen = parent_gen;
}
return ret;
}
static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
{
struct fs_path *from_path = NULL;
......@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
u64 parent_ino, parent_gen;
struct waiting_dir_move *dm = NULL;
u64 rmdir_ino = 0;
u64 ancestor;
bool is_orphan;
int ret;
name = fs_path_alloc();
......@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
rmdir_ino = dm->rmdir_ino;
is_orphan = dm->orphanized;
free_waiting_dir_move(sctx, dm);
if (pm->is_orphan) {
if (is_orphan) {
ret = gen_unique_name(sctx, pm->ino,
pm->gen, from_path);
} else {
......@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
goto out;
sctx->send_progress = sctx->cur_ino + 1;
ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
if (ret < 0)
goto out;
if (ret) {
LIST_HEAD(deleted_refs);
ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
&pm->update_refs, &deleted_refs,
is_orphan);
if (ret < 0)
goto out;
if (rmdir_ino) {
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
dm->rmdir_ino = rmdir_ino;
}
goto out;
}
fs_path_reset(name);
to_path = name;
name = NULL;
......@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
/* already deleted */
goto finish;
}
ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret)
......@@ -3204,8 +3305,18 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
* and old parent(s).
*/
list_for_each_entry(cur, &pm->update_refs, list) {
if (cur->dir == rmdir_ino)
/*
* The parent inode might have been deleted in the send snapshot
*/
ret = get_inode_info(sctx->send_root, cur->dir, NULL,
NULL, NULL, NULL, NULL, NULL);
if (ret == -ENOENT) {
ret = 0;
continue;
}
if (ret < 0)
goto out;
ret = send_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
......@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
u64 left_gen;
u64 right_gen;
int ret = 0;
struct waiting_dir_move *wdm;
if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
return 0;
......@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
goto out;
}
if (is_waiting_for_move(sctx, di_key.objectid)) {
wdm = get_waiting_dir_move(sctx, di_key.objectid);
if (wdm && !wdm->orphanized) {
ret = add_pending_dir_move(sctx,
sctx->cur_ino,
sctx->cur_inode_gen,
......@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
ret = is_ancestor(sctx->parent_root,
sctx->cur_ino, sctx->cur_inode_gen,
ino, path_before);
break;
if (ret)
break;
}
fs_path_reset(path_before);
......@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
goto out;
if (ret) {
struct name_cache_entry *nce;
struct waiting_dir_move *wdm;
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
goto out;
/*
* If ow_inode has its rename operation delayed
* make sure that its orphanized name is used in
* the source path when performing its rename
* operation.
*/
if (is_waiting_for_move(sctx, ow_inode)) {
wdm = get_waiting_dir_move(sctx,
ow_inode);
ASSERT(wdm);
wdm->orphanized = true;
}
/*
* Make sure we clear our orphanized inode's
* name from the name cache. This is because the
......@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
name_cache_delete(sctx, nce);
kfree(nce);
}
/*
* ow_inode might currently be an ancestor of
* cur_ino, therefore compute valid_path (the
* current path of cur_ino) again because it
* might contain the pre-orphanization name of
* ow_inode, which is no longer valid.
*/
fs_path_reset(valid_path);
ret = get_cur_path(sctx, sctx->cur_ino,
sctx->cur_inode_gen, valid_path);
if (ret < 0)
goto out;
} else {
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
......@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx,
{
int ret = 0;
BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
if (sctx->cur_ino != sctx->cmp_key->objectid) {
inconsistent_snapshot_error(sctx, result, "reference");
return -EIO;
}
if (!sctx->cur_inode_new_gen &&
sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
......@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx,
{
int ret = 0;
BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
if (sctx->cur_ino != sctx->cmp_key->objectid) {
inconsistent_snapshot_error(sctx, result, "xattr");
return -EIO;
}
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result == BTRFS_COMPARE_TREE_NEW)
......@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx,
{
int ret = 0;
BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
if (sctx->cur_ino != sctx->cmp_key->objectid) {
inconsistent_snapshot_error(sctx, result, "extent");
return -EIO;
}
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED)
......
......@@ -4469,7 +4469,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
static int btrfs_check_ref_name_override(struct extent_buffer *eb,
const int slot,
const struct btrfs_key *key,
struct inode *inode)
struct inode *inode,
u64 *other_ino)
{
int ret;
struct btrfs_path *search_path;
......@@ -4528,7 +4529,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
search_path, parent,
name, this_name_len, 0);
if (di && !IS_ERR(di)) {
ret = 1;
struct btrfs_key di_key;
btrfs_dir_item_key_to_cpu(search_path->nodes[0],
di, &di_key);
if (di_key.type == BTRFS_INODE_ITEM_KEY) {
ret = 1;
*other_ino = di_key.objectid;
} else {
ret = -EAGAIN;
}
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
......@@ -4722,16 +4732,71 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if ((min_key.type == BTRFS_INODE_REF_KEY ||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
BTRFS_I(inode)->generation == trans->transid) {
u64 other_ino = 0;
ret = btrfs_check_ref_name_override(path->nodes[0],
path->slots[0],
&min_key, inode);
&min_key, inode,
&other_ino);
if (ret < 0) {
err = ret;
goto out_unlock;
} else if (ret > 0) {
err = 1;
btrfs_set_log_full_commit(root->fs_info, trans);
goto out_unlock;
struct btrfs_key inode_key;
struct inode *other_inode;
if (ins_nr > 0) {
ins_nr++;
} else {
ins_nr = 1;
ins_start_slot = path->slots[0];
}
ret = copy_items(trans, inode, dst_path, path,
&last_extent, ins_start_slot,
ins_nr, inode_only,
logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
}
ins_nr = 0;
btrfs_release_path(path);
inode_key.objectid = other_ino;
inode_key.type = BTRFS_INODE_ITEM_KEY;
inode_key.offset = 0;
other_inode = btrfs_iget(root->fs_info->sb,
&inode_key, root,
NULL);
/*
* If the other inode that had a conflicting dir
* entry was deleted in the current transaction,
* we don't need to do more work nor fallback to
* a transaction commit.
*/
if (IS_ERR(other_inode) &&
PTR_ERR(other_inode) == -ENOENT) {
goto next_key;
} else if (IS_ERR(other_inode)) {
err = PTR_ERR(other_inode);
goto out_unlock;
}
/*
* We are safe logging the other inode without
* acquiring its i_mutex as long as we log with
* the LOG_INODE_EXISTS mode. We're safe against
* concurrent renames of the other inode as well
* because during a rename we pin the log and
* update the log with the new name before we
* unpin it.
*/
err = btrfs_log_inode(trans, root, other_inode,
LOG_INODE_EXISTS,
0, LLONG_MAX, ctx);
iput(other_inode);
if (err)
goto out_unlock;
else
goto next_key;
}
}
......@@ -4799,7 +4864,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ins_nr = 0;
}
btrfs_release_path(path);
next_key:
if (min_key.offset < (u64)-1) {
min_key.offset++;
} else if (min_key.type < max_key.type) {
......@@ -4993,8 +5058,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
break;
if (IS_ROOT(parent))
if (IS_ROOT(parent)) {
inode = d_inode(parent);
if (btrfs_must_commit_transaction(trans, inode))
ret = 1;
break;
}
parent = dget_parent(parent);
dput(old_parent);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment