Commit f7e9e8fc authored by Omar Sandoval's avatar Omar Sandoval Committed by David Sterba

Btrfs: stop creating orphan items for truncate

Currently, we insert an orphan item during a truncate so that if there's
a crash, we don't leak extents past the on-disk i_size. However, since
commit 7f4f6e0a ("Btrfs: only update disk_i_size as we remove
extents"), we keep disk_i_size in sync with the extent items as we
truncate, so orphan cleanup will never have any extents to remove. Don't
bother with the superfluous orphan item.
Reviewed-by: default avatarJosef Bacik <jbacik@fb.com>
Signed-off-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 05522109
...@@ -253,10 +253,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, ...@@ -253,10 +253,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
truncate_pagecache(inode, 0); truncate_pagecache(inode, 0);
/* /*
* We don't need an orphan item because truncating the free space cache * We skip the throttling logic for free space cache inodes, so we don't
* will never be split across transactions. * need to check for -EAGAIN.
* We don't need to check for -EAGAIN because we're a free space
* cache inode
*/ */
ret = btrfs_truncate_inode_items(trans, root, inode, ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY); 0, BTRFS_EXTENT_DATA_KEY);
......
...@@ -3346,8 +3346,8 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, ...@@ -3346,8 +3346,8 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
} }
/* /*
* This creates an orphan entry for the given inode in case something goes * This creates an orphan entry for the given inode in case something goes wrong
* wrong in the middle of an unlink/truncate. * in the middle of an unlink.
* *
* NOTE: caller of this function should reserve 5 units of metadata for * NOTE: caller of this function should reserve 5 units of metadata for
* this function. * this function.
...@@ -3410,7 +3410,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, ...@@ -3410,7 +3410,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
} }
} }
/* insert an orphan item to track this unlinked/truncated file */ /* insert an orphan item to track this unlinked file */
if (insert) { if (insert) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) { if (ret) {
...@@ -3439,8 +3439,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, ...@@ -3439,8 +3439,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
} }
/* /*
* We have done the truncate/delete so we can go ahead and remove the orphan * We have done the delete so we can go ahead and remove the orphan item for
* item for this particular inode. * this particular inode.
*/ */
static int btrfs_orphan_del(struct btrfs_trans_handle *trans, static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode) struct btrfs_inode *inode)
...@@ -3484,7 +3484,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3484,7 +3484,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct inode *inode; struct inode *inode;
u64 last_objectid = 0; u64 last_objectid = 0;
int ret = 0, nr_unlink = 0, nr_truncate = 0; int ret = 0, nr_unlink = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
return 0; return 0;
...@@ -3584,12 +3584,31 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3584,12 +3584,31 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
key.offset = found_key.objectid - 1; key.offset = found_key.objectid - 1;
continue; continue;
} }
} }
/* /*
* Inode is already gone but the orphan item is still there, * If we have an inode with links, there are a couple of
* kill the orphan item. * possibilities. Old kernels (before v3.12) used to create an
* orphan item for truncate indicating that there were possibly
* extent items past i_size that needed to be deleted. In v3.12,
* truncate was changed to update i_size in sync with the extent
* items, but the (useless) orphan item was still created. Since
* v4.18, we don't create the orphan item for truncate at all.
*
* So, this item could mean that we need to do a truncate, but
* only if this filesystem was last used on a pre-v3.12 kernel
* and was not cleanly unmounted. The odds of that are quite
* slim, and it's a pain to do the truncate now, so just delete
* the orphan item.
*
* It's also possible that this orphan item was supposed to be
* deleted but wasn't. The inode number may have been reused,
* but either way, we can delete the orphan item.
*/ */
if (ret == -ENOENT) { if (ret == -ENOENT || inode->i_nlink) {
if (!ret)
iput(inode);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
...@@ -3613,34 +3632,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3613,34 +3632,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
&BTRFS_I(inode)->runtime_flags); &BTRFS_I(inode)->runtime_flags);
atomic_inc(&root->orphan_inodes); atomic_inc(&root->orphan_inodes);
/* if we have links, this was a truncate, lets do that */ nr_unlink++;
if (inode->i_nlink) {
if (WARN_ON(!S_ISREG(inode->i_mode))) {
iput(inode);
continue;
}
nr_truncate++;
/* 1 for the orphan item deletion. */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
iput(inode);
ret = PTR_ERR(trans);
goto out;
}
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
btrfs_end_transaction(trans);
if (ret) {
iput(inode);
goto out;
}
ret = btrfs_truncate(inode, false);
if (ret)
btrfs_orphan_del(NULL, BTRFS_I(inode));
} else {
nr_unlink++;
}
/* this will do delete_inode and everything for us */ /* this will do delete_inode and everything for us */
iput(inode); iput(inode);
...@@ -3665,8 +3657,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ...@@ -3665,8 +3657,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
if (nr_unlink) if (nr_unlink)
btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink); btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
if (nr_truncate)
btrfs_debug(fs_info, "truncated %d orphans", nr_truncate);
out: out:
if (ret) if (ret)
...@@ -5350,29 +5340,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) ...@@ -5350,29 +5340,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags); &BTRFS_I(inode)->runtime_flags);
/*
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion.
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
/*
* We need to do this in case we fail at _any_ point during the
* actual truncate. Once we do the truncate_setsize we could
* invalidate pages which forces any outstanding ordered io to
* be instantly completed which will give us extents that need
* to be truncated. If we fail to get an orphan inode down we
* could have left over extents that were never meant to live,
* so we need to guarantee from this point on that everything
* will be consistent.
*/
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
btrfs_end_transaction(trans);
if (ret)
return ret;
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */ /* Disable nonlocked read DIO to avoid the end less truncate */
...@@ -5384,29 +5351,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) ...@@ -5384,29 +5351,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (ret && inode->i_nlink) { if (ret && inode->i_nlink) {
int err; int err;
/* To get a stable disk_i_size */
err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err) {
btrfs_orphan_del(NULL, BTRFS_I(inode));
return err;
}
/* /*
* failed to truncate, disk_i_size is only adjusted down * Truncate failed, so fix up the in-memory size. We
* as we remove extents, so it should represent the true * adjusted disk_i_size down as we removed extents, so
* size of the inode, so reset the in memory size and * wait for disk_i_size to be stable and then update the
* delete our orphan entry. * in-memory size to match.
*/ */
trans = btrfs_join_transaction(root); err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, BTRFS_I(inode));
return ret;
}
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
err = btrfs_orphan_del(trans, BTRFS_I(inode));
if (err) if (err)
btrfs_abort_transaction(trans, err); return err;
btrfs_end_transaction(trans); i_size_write(inode, BTRFS_I(inode)->disk_i_size);
} }
} }
...@@ -9224,39 +9178,31 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) ...@@ -9224,39 +9178,31 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
} }
/* /*
* Yes ladies and gentlemen, this is indeed ugly. The fact is we have * Yes ladies and gentlemen, this is indeed ugly. We have a couple of
* 3 things going on here * things going on here:
*
* 1) We need to reserve space for our orphan item and the space to
* delete our orphan item. Lord knows we don't want to have a dangling
* orphan item because we didn't reserve space to remove it.
* *
* 2) We need to reserve space to update our inode. * 1) We need to reserve space to update our inode.
* *
* 3) We need to have something to cache all the space that is going to * 2) We need to have something to cache all the space that is going to
* be free'd up by the truncate operation, but also have some slack * be free'd up by the truncate operation, but also have some slack
* space reserved in case it uses space during the truncate (thank you * space reserved in case it uses space during the truncate (thank you
* very much snapshotting). * very much snapshotting).
* *
* And we need these to all be separate. The fact is we can use a lot of * And we need these to be separate. The fact is we can use a lot of
* space doing the truncate, and we have no earthly idea how much space * space doing the truncate, and we have no earthly idea how much space
* we will use, so we need the truncate reservation to be separate so it * we will use, so we need the truncate reservation to be separate so it
* doesn't end up using space reserved for updating the inode or * doesn't end up using space reserved for updating the inode. We also
* removing the orphan item. We also need to be able to stop the * need to be able to stop the transaction and start a new one, which
* transaction and start a new one, which means we need to be able to * means we need to be able to update the inode several times, and we
* update the inode several times, and we have no idea of knowing how * have no idea of knowing how many times that will be, so we can't just
* many times that will be, so we can't just reserve 1 item for the * reserve 1 item for the entirety of the operation, so that has to be
* entirety of the operation, so that has to be done separately as well. * done separately as well.
* Then there is the orphan item, which does indeed need to be held on
* to for the whole operation, and we need nobody to touch this reserved
* space except the orphan code.
* *
* So that leaves us with * So that leaves us with
* *
* 1) root->orphan_block_rsv - for the orphan deletion. * 1) rsv - for the truncate reservation, which we will steal from the
* 2) rsv - for the truncate reservation, which we will steal from the
* transaction reservation. * transaction reservation.
* 3) fs_info->trans_block_rsv - this will have 1 items worth left for * 2) fs_info->trans_block_rsv - this will have 1 items worth left for
* updating the inode. * updating the inode.
*/ */
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
...@@ -9345,13 +9291,6 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) ...@@ -9345,13 +9291,6 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
btrfs_ordered_update_i_size(inode, inode->i_size, NULL); btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
} }
if (ret == 0 && inode->i_nlink > 0) {
trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
if (ret)
err = ret;
}
if (trans) { if (trans) {
trans->block_rsv = &fs_info->trans_block_rsv; trans->block_rsv = &fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode); ret = btrfs_update_inode(trans, root, inode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment