Btrfs: stop creating orphan items for truncate

Currently, we insert an orphan item during a truncate so that if there's
a crash, we don't leak extents past the on-disk i_size. However, since
commit 7f4f6e0a3f ("Btrfs: only update disk_i_size as we remove
extents"), we keep disk_i_size in sync with the extent items as we
truncate, so orphan cleanup will never have any extents to remove. Don't
bother with the superfluous orphan item.

Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Omar Sandoval 2018-05-11 13:13:32 -07:00 committed by David Sterba
parent 0552210997
commit f7e9e8fc79
2 changed files with 51 additions and 114 deletions

View File

@ -253,10 +253,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
truncate_pagecache(inode, 0);
/*
* We don't need an orphan item because truncating the free space cache
* will never be split across transactions.
* We don't need to check for -EAGAIN because we're a free space
* cache inode
* We skip the throttling logic for free space cache inodes, so we don't
* need to check for -EAGAIN.
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);

View File

@ -3346,8 +3346,8 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
}
/*
* This creates an orphan entry for the given inode in case something goes
* wrong in the middle of an unlink/truncate.
* This creates an orphan entry for the given inode in case something goes wrong
* in the middle of an unlink.
*
* NOTE: caller of this function should reserve 5 units of metadata for
* this function.
@ -3410,7 +3410,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
}
}
/* insert an orphan item to track this unlinked/truncated file */
/* insert an orphan item to track this unlinked file */
if (insert) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) {
@ -3439,8 +3439,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
}
/*
* We have done the truncate/delete so we can go ahead and remove the orphan
* item for this particular inode.
* We have done the delete so we can go ahead and remove the orphan item for
* this particular inode.
*/
static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
@ -3484,7 +3484,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
struct btrfs_trans_handle *trans;
struct inode *inode;
u64 last_objectid = 0;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
int ret = 0, nr_unlink = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
return 0;
@ -3584,12 +3584,31 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
key.offset = found_key.objectid - 1;
continue;
}
}
/*
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
* If we have an inode with links, there are a couple of
* possibilities. Old kernels (before v3.12) used to create an
* orphan item for truncate indicating that there were possibly
* extent items past i_size that needed to be deleted. In v3.12,
* truncate was changed to update i_size in sync with the extent
* items, but the (useless) orphan item was still created. Since
* v4.18, we don't create the orphan item for truncate at all.
*
* So, this item could mean that we need to do a truncate, but
* only if this filesystem was last used on a pre-v3.12 kernel
* and was not cleanly unmounted. The odds of that are quite
* slim, and it's a pain to do the truncate now, so just delete
* the orphan item.
*
* It's also possible that this orphan item was supposed to be
* deleted but wasn't. The inode number may have been reused,
* but either way, we can delete the orphan item.
*/
if (ret == -ENOENT) {
if (ret == -ENOENT || inode->i_nlink) {
if (!ret)
iput(inode);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@ -3613,34 +3632,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
&BTRFS_I(inode)->runtime_flags);
atomic_inc(&root->orphan_inodes);
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
if (WARN_ON(!S_ISREG(inode->i_mode))) {
iput(inode);
continue;
}
nr_truncate++;
/* 1 for the orphan item deletion. */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
iput(inode);
ret = PTR_ERR(trans);
goto out;
}
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
btrfs_end_transaction(trans);
if (ret) {
iput(inode);
goto out;
}
ret = btrfs_truncate(inode, false);
if (ret)
btrfs_orphan_del(NULL, BTRFS_I(inode));
} else {
nr_unlink++;
}
nr_unlink++;
/* this will do delete_inode and everything for us */
iput(inode);
@ -3665,8 +3657,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
if (nr_unlink)
btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
if (nr_truncate)
btrfs_debug(fs_info, "truncated %d orphans", nr_truncate);
out:
if (ret)
@ -5350,29 +5340,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
/*
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion.
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
/*
* We need to do this in case we fail at _any_ point during the
* actual truncate. Once we do the truncate_setsize we could
* invalidate pages which forces any outstanding ordered io to
* be instantly completed which will give us extents that need
* to be truncated. If we fail to get an orphan inode down we
* could have left over extents that were never meant to live,
* so we need to guarantee from this point on that everything
* will be consistent.
*/
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
btrfs_end_transaction(trans);
if (ret)
return ret;
truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */
@ -5384,29 +5351,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (ret && inode->i_nlink) {
int err;
/* To get a stable disk_i_size */
err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err) {
btrfs_orphan_del(NULL, BTRFS_I(inode));
return err;
}
/*
* failed to truncate, disk_i_size is only adjusted down
* as we remove extents, so it should represent the true
* size of the inode, so reset the in memory size and
* delete our orphan entry.
* Truncate failed, so fix up the in-memory size. We
* adjusted disk_i_size down as we removed extents, so
* wait for disk_i_size to be stable and then update the
* in-memory size to match.
*/
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, BTRFS_I(inode));
return ret;
}
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
err = btrfs_orphan_del(trans, BTRFS_I(inode));
err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err)
btrfs_abort_transaction(trans, err);
btrfs_end_transaction(trans);
return err;
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
}
}
@ -9224,39 +9178,31 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
}
/*
* Yes ladies and gentlemen, this is indeed ugly. The fact is we have
* 3 things going on here
* Yes ladies and gentlemen, this is indeed ugly. We have a couple of
* things going on here:
*
* 1) We need to reserve space for our orphan item and the space to
* delete our orphan item. Lord knows we don't want to have a dangling
* orphan item because we didn't reserve space to remove it.
* 1) We need to reserve space to update our inode.
*
* 2) We need to reserve space to update our inode.
*
* 3) We need to have something to cache all the space that is going to
* 2) We need to have something to cache all the space that is going to
* be free'd up by the truncate operation, but also have some slack
* space reserved in case it uses space during the truncate (thank you
* very much snapshotting).
*
* And we need these to all be separate. The fact is we can use a lot of
* And we need these to be separate. The fact is we can use a lot of
* space doing the truncate, and we have no earthly idea how much space
* we will use, so we need the truncate reservation to be separate so it
* doesn't end up using space reserved for updating the inode or
* removing the orphan item. We also need to be able to stop the
* transaction and start a new one, which means we need to be able to
* update the inode several times, and we have no idea of knowing how
* many times that will be, so we can't just reserve 1 item for the
* entirety of the operation, so that has to be done separately as well.
* Then there is the orphan item, which does indeed need to be held on
* to for the whole operation, and we need nobody to touch this reserved
* space except the orphan code.
* doesn't end up using space reserved for updating the inode. We also
* need to be able to stop the transaction and start a new one, which
* means we need to be able to update the inode several times, and we
* have no idea of knowing how many times that will be, so we can't just
* reserve 1 item for the entirety of the operation, so that has to be
* done separately as well.
*
* So that leaves us with
*
* 1) root->orphan_block_rsv - for the orphan deletion.
* 2) rsv - for the truncate reservation, which we will steal from the
* 1) rsv - for the truncate reservation, which we will steal from the
* transaction reservation.
* 3) fs_info->trans_block_rsv - this will have 1 items worth left for
* 2) fs_info->trans_block_rsv - this will have 1 items worth left for
* updating the inode.
*/
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
@ -9345,13 +9291,6 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
}
if (ret == 0 && inode->i_nlink > 0) {
trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
if (ret)
err = ret;
}
if (trans) {
trans->block_rsv = &fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);