Commit 8d19514f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "We've got corner cases for updating i_size that ceph was hitting,
  error handling for quotas when we run out of space, a very subtle
  snapshot deletion race, a crash while removing devices, and one
  deadlock between subvolume creation and the sb_internal code (thanks
  lockdep)."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: move d_instantiate outside the transaction during mksubvol
  Btrfs: fix EDQUOT handling in btrfs_delalloc_reserve_metadata
  Btrfs: fix possible stale data exposure
  Btrfs: fix missing i_size update
  Btrfs: fix race between snapshot deletion and getting inode
  Btrfs: fix missing release of the space/qgroup reservation in start_transaction()
  Btrfs: fix wrong sync_writers decrement in btrfs_file_aio_write()
  Btrfs: do not merge logged extents if we've removed them from the tree
  btrfs: don't try to notify udev about missing devices
parents 95436ada 1a65e24b
...@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
unsigned nr_extents = 0; unsigned nr_extents = 0;
int extra_reserve = 0; int extra_reserve = 0;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret; int ret = 0;
bool delalloc_lock = true; bool delalloc_lock = true;
/* If we are a free space inode we need to not flush since we will be in /* If we are a free space inode we need to not flush since we will be in
...@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
csum_bytes = BTRFS_I(inode)->csum_bytes; csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock); spin_unlock(&BTRFS_I(inode)->lock);
if (root->fs_info->quota_enabled) { if (root->fs_info->quota_enabled)
ret = btrfs_qgroup_reserve(root, num_bytes + ret = btrfs_qgroup_reserve(root, num_bytes +
nr_extents * root->leafsize); nr_extents * root->leafsize);
if (ret) {
spin_lock(&BTRFS_I(inode)->lock);
calc_csum_metadata_size(inode, num_bytes, 0);
spin_unlock(&BTRFS_I(inode)->lock);
if (delalloc_lock)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
return ret;
}
}
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); /*
* ret != 0 here means the qgroup reservation failed, we go straight to
* the shared error handling then.
*/
if (ret == 0)
ret = reserve_metadata_bytes(root, block_rsv,
to_reserve, flush);
if (ret) { if (ret) {
u64 to_free = 0; u64 to_free = 0;
unsigned dropped; unsigned dropped;
......
...@@ -288,6 +288,7 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, ...@@ -288,6 +288,7 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{ {
clear_bit(EXTENT_FLAG_LOGGING, &em->flags); clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
if (em->in_tree)
try_merge_map(tree, em); try_merge_map(tree, em);
} }
......
...@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, ...@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct btrfs_key key; struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range; struct btrfs_ioctl_defrag_range_args range;
int num_defrag; int num_defrag;
int index;
int ret;
/* get the inode */ /* get the inode */
key.objectid = defrag->root; key.objectid = defrag->root;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1; key.offset = (u64)-1;
index = srcu_read_lock(&fs_info->subvol_srcu);
inode_root = btrfs_read_fs_root_no_name(fs_info, &key); inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(inode_root)) { if (IS_ERR(inode_root)) {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag); ret = PTR_ERR(inode_root);
return PTR_ERR(inode_root); goto cleanup;
}
if (btrfs_root_refs(&inode_root->root_item) == 0) {
ret = -ENOENT;
goto cleanup;
} }
key.objectid = defrag->ino; key.objectid = defrag->ino;
...@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, ...@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
key.offset = 0; key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag); ret = PTR_ERR(inode);
return PTR_ERR(inode); goto cleanup;
} }
srcu_read_unlock(&fs_info->subvol_srcu, index);
/* do a chunk of defrag */ /* do a chunk of defrag */
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
...@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, ...@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
iput(inode); iput(inode);
return 0; return 0;
cleanup:
srcu_read_unlock(&fs_info->subvol_srcu, index);
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
return ret;
} }
/* /*
...@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (err < 0 && num_written > 0) if (err < 0 && num_written > 0)
num_written = err; num_written = err;
} }
out:
if (sync) if (sync)
atomic_dec(&BTRFS_I(inode)->sync_writers); atomic_dec(&BTRFS_I(inode)->sync_writers);
out:
sb_end_write(inode->i_sb); sb_end_write(inode->i_sb);
current->backing_dev_info = NULL; current->backing_dev_info = NULL;
return num_written ? num_written : err; return num_written ? num_written : err;
......
...@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root, ...@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
BUG_ON(ret); BUG_ON(ret);
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail: fail:
if (async_transid) { if (async_transid) {
*async_transid = trans->transid; *async_transid = trans->transid;
...@@ -525,6 +524,10 @@ static noinline int create_subvol(struct btrfs_root *root, ...@@ -525,6 +524,10 @@ static noinline int create_subvol(struct btrfs_root *root,
} }
if (err && !ret) if (err && !ret)
ret = err; ret = err;
if (!ret)
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
return ret; return ret;
} }
......
...@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, ...@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
* if the disk i_size is already at the inode->i_size, or * if the disk i_size is already at the inode->i_size, or
* this ordered extent is inside the disk i_size, we're done * this ordered extent is inside the disk i_size, we're done
*/ */
if (disk_i_size == i_size || offset <= disk_i_size) { if (disk_i_size == i_size)
goto out;
/*
* We still need to update disk_i_size if outstanding_isize is greater
* than disk_i_size.
*/
if (offset <= disk_i_size &&
(!ordered || ordered->outstanding_isize <= disk_i_size))
goto out; goto out;
}
/* /*
* walk backward from this ordered extent to disk_i_size. * walk backward from this ordered extent to disk_i_size.
...@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, ...@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
break; break;
if (test->file_offset >= i_size) if (test->file_offset >= i_size)
break; break;
if (test->file_offset >= disk_i_size) { if (entry_end(test) > disk_i_size) {
/* /*
* we don't update disk_i_size now, so record this * we don't update disk_i_size now, so record this
* undealt i_size. Or we will not know the real * undealt i_size. Or we will not know the real
......
...@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) ...@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
int corrected = 0; int corrected = 0;
struct btrfs_key key; struct btrfs_key key;
struct inode *inode = NULL; struct inode *inode = NULL;
struct btrfs_fs_info *fs_info;
u64 end = offset + PAGE_SIZE - 1; u64 end = offset + PAGE_SIZE - 1;
struct btrfs_root *local_root; struct btrfs_root *local_root;
int srcu_index;
key.objectid = root; key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY; key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1; key.offset = (u64)-1;
local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
if (IS_ERR(local_root)) fs_info = fixup->root->fs_info;
srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
local_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(local_root)) {
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
return PTR_ERR(local_root); return PTR_ERR(local_root);
}
key.type = BTRFS_INODE_ITEM_KEY; key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum; key.objectid = inum;
key.offset = 0; key.offset = 0;
inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
if (IS_ERR(inode)) if (IS_ERR(inode))
return PTR_ERR(inode); return PTR_ERR(inode);
...@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) ...@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
} }
if (PageUptodate(page)) { if (PageUptodate(page)) {
struct btrfs_fs_info *fs_info;
if (PageDirty(page)) { if (PageDirty(page)) {
/* /*
* we need to write the data to the defect sector. the * we need to write the data to the defect sector. the
...@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) ...@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
u64 physical_for_dev_replace; u64 physical_for_dev_replace;
u64 len; u64 len;
struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
int srcu_index;
key.objectid = root; key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY; key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1; key.offset = (u64)-1;
srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
local_root = btrfs_read_fs_root_no_name(fs_info, &key); local_root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(local_root)) if (IS_ERR(local_root)) {
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
return PTR_ERR(local_root); return PTR_ERR(local_root);
}
key.type = BTRFS_INODE_ITEM_KEY; key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum; key.objectid = inum;
key.offset = 0; key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
if (IS_ERR(inode)) if (IS_ERR(inode))
return PTR_ERR(inode); return PTR_ERR(inode);
......
...@@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, ...@@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
&root->fs_info->trans_block_rsv, &root->fs_info->trans_block_rsv,
num_bytes, flush); num_bytes, flush);
if (ret) if (ret)
return ERR_PTR(ret); goto reserve_fail;
} }
again: again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h) if (!h) {
return ERR_PTR(-ENOMEM); ret = -ENOMEM;
goto alloc_fail;
}
/* /*
* If we are JOIN_NOLOCK we're already committing a transaction and * If we are JOIN_NOLOCK we're already committing a transaction and
...@@ -365,11 +367,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, ...@@ -365,11 +367,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
if (ret < 0) { if (ret < 0) {
/* We must get the transaction if we are JOIN_NOLOCK. */ /* We must get the transaction if we are JOIN_NOLOCK. */
BUG_ON(type == TRANS_JOIN_NOLOCK); BUG_ON(type == TRANS_JOIN_NOLOCK);
goto join_fail;
if (type < TRANS_JOIN_NOLOCK)
sb_end_intwrite(root->fs_info->sb);
kmem_cache_free(btrfs_trans_handle_cachep, h);
return ERR_PTR(ret);
} }
cur_trans = root->fs_info->running_transaction; cur_trans = root->fs_info->running_transaction;
...@@ -410,6 +408,19 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, ...@@ -410,6 +408,19 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
if (!current->journal_info && type != TRANS_USERSPACE) if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h; current->journal_info = h;
return h; return h;
join_fail:
if (type < TRANS_JOIN_NOLOCK)
sb_end_intwrite(root->fs_info->sb);
kmem_cache_free(btrfs_trans_handle_cachep, h);
alloc_fail:
if (num_bytes)
btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
num_bytes);
reserve_fail:
if (qgroup_reserved)
btrfs_qgroup_free(root, qgroup_reserved);
return ERR_PTR(ret);
} }
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
......
...@@ -1556,6 +1556,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) ...@@ -1556,6 +1556,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = 0; ret = 0;
/* Notify udev that device has changed */ /* Notify udev that device has changed */
if (bdev)
btrfs_kobject_uevent(bdev, KOBJ_CHANGE); btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
error_brelse: error_brelse:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment