for-6.3-rc4-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmQptV0ACgkQxWXV+ddt WDuZ/g/8CAu7WKhj/aLsYB/xRcOcloeoUZXMhb6NUxZC14ZHrSc9rWMPF7S8T4qK PwoNfhROdox+laAYX2WcOgo6yZ4Rhd+yDdyqLgQIbc0q3cWfOJ/vzSkeREdNCvNW qTicdB59Mka0YT+BOC9em29bsxHLpEMKmg1o5tao8LCdc17jPFyPN6BYgxFfeenQ aetKUyosqllEBxlpJHaLG1+gKZrI2VaCyhrCEw66Mbtri5WbwN3cTJOXqNSkySDB JKEs3y4yMo3Xiz+UhCaq614EzX1SR15n/WP7ZvjxvlXXJ0iHp4f11zSlUnm2u+jI JN5lkfBorSRMowgnLWGDn5zQDKXJOk1aAWv5YgqTqpWKg6X/fHxTdt4wdCSZ08m9 dwVWqWN2BD7jS0UT45IPsniwGI9bkLRcNUFNgbFtRD9X52U2ie/PSv9qdz9gsDLW 5FSXv65gD+kWdkpyw7NLRtXO1FPe6wfPm5ZqecEChIQmWUiisOnJwjKlewQUdRsy zki4wRGxiqKgSlrxrCLs24r9291EwjR9FcBTZLrYRNbCBf32xIGG2CUhPBapx4kB xgMHCn5NdP/cHPxqzQNeq8z8NI4F648qr6Z2KS03rmWZv9/1xsB39NFS4qLjrOM7 YqpNDtCGVG5HpMWzardbcZ2FdoKj+o1qCCW851y8tDCdimPhSfk= =v7ZW -----END PGP SIGNATURE----- Merge tag 'for-6.3-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - scan block devices in non-exclusive mode to avoid temporary mkfs failures - fix race between quota disable and quota assign ioctls - fix deadlock when aborting transaction during relocation with scrub - ignore fiemap path cache when there are multiple paths for a node * tag 'for-6.3-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: ignore fiemap path cache when there are multiple paths for a node btrfs: fix deadlock when aborting transaction during relocation with scrub btrfs: scan device in non-exclusive mode btrfs: fix race between quota disable and quota assign ioctls
This commit is contained in:
commit
6ab608fe85
|
@ -1921,8 +1921,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
|||
level = -1;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while (1) {
|
||||
bool is_shared;
|
||||
bool cached;
|
||||
const unsigned long prev_ref_count = ctx->refs.nnodes;
|
||||
|
||||
walk_ctx.bytenr = bytenr;
|
||||
ret = find_parent_nodes(&walk_ctx, &shared);
|
||||
|
@ -1940,21 +1939,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
|||
ret = 0;
|
||||
|
||||
/*
|
||||
* If our data extent was not directly shared (without multiple
|
||||
* reference items), than it might have a single reference item
|
||||
* with a count > 1 for the same offset, which means there are 2
|
||||
* (or more) file extent items that point to the data extent -
|
||||
* this happens when a file extent item needs to be split and
|
||||
* then one item gets moved to another leaf due to a b+tree leaf
|
||||
* split when inserting some item. In this case the file extent
|
||||
* items may be located in different leaves and therefore some
|
||||
* of the leaves may be referenced through shared subtrees while
|
||||
* others are not. Since our extent buffer cache only works for
|
||||
* a single path (by far the most common case and simpler to
|
||||
* deal with), we can not use it if we have multiple leaves
|
||||
* (which implies multiple paths).
|
||||
* More than one extent buffer (bytenr) may have been added to
|
||||
* the ctx->refs ulist, in which case we have to check multiple
|
||||
* tree paths in case the first one is not shared, so we can not
|
||||
* use the path cache which is made for a single path. Multiple
|
||||
* extent buffers at the current level happen when:
|
||||
*
|
||||
* 1) level -1, the data extent: If our data extent was not
|
||||
* directly shared (without multiple reference items), then
|
||||
* it might have a single reference item with a count > 1 for
|
||||
* the same offset, which means there are 2 (or more) file
|
||||
* extent items that point to the data extent - this happens
|
||||
* when a file extent item needs to be split and then one
|
||||
* item gets moved to another leaf due to a b+tree leaf split
|
||||
* when inserting some item. In this case the file extent
|
||||
* items may be located in different leaves and therefore
|
||||
* some of the leaves may be referenced through shared
|
||||
* subtrees while others are not. Since our extent buffer
|
||||
* cache only works for a single path (by far the most common
|
||||
* case and simpler to deal with), we can not use it if we
|
||||
* have multiple leaves (which implies multiple paths).
|
||||
*
|
||||
* 2) level >= 0, a tree node/leaf: We can have a mix of direct
|
||||
* and indirect references on a b+tree node/leaf, so we have
|
||||
* to check multiple paths, and the extent buffer (the
|
||||
* current bytenr) may be shared or not. One example is
|
||||
* during relocation as we may get a shared tree block ref
|
||||
* (direct ref) and a non-shared tree block ref (indirect
|
||||
* ref) for the same node/leaf.
|
||||
*/
|
||||
if (level == -1 && ctx->refs.nnodes > 1)
|
||||
if ((ctx->refs.nnodes - prev_ref_count) > 1)
|
||||
ctx->use_path_cache = false;
|
||||
|
||||
if (level >= 0)
|
||||
|
@ -1964,18 +1978,45 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
|||
if (!node)
|
||||
break;
|
||||
bytenr = node->val;
|
||||
level++;
|
||||
cached = lookup_backref_shared_cache(ctx, root, bytenr, level,
|
||||
&is_shared);
|
||||
if (cached) {
|
||||
ret = (is_shared ? 1 : 0);
|
||||
break;
|
||||
if (ctx->use_path_cache) {
|
||||
bool is_shared;
|
||||
bool cached;
|
||||
|
||||
level++;
|
||||
cached = lookup_backref_shared_cache(ctx, root, bytenr,
|
||||
level, &is_shared);
|
||||
if (cached) {
|
||||
ret = (is_shared ? 1 : 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
shared.share_count = 0;
|
||||
shared.have_delayed_delete_refs = false;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* If the path cache is disabled, then it means at some tree level we
|
||||
* got multiple parents due to a mix of direct and indirect backrefs or
|
||||
* multiple leaves with file extent items pointing to the same data
|
||||
* extent. We have to invalidate the cache and cache only the sharedness
|
||||
* result for the levels where we got only one node/reference.
|
||||
*/
|
||||
if (!ctx->use_path_cache) {
|
||||
int i = 0;
|
||||
|
||||
level--;
|
||||
if (ret >= 0 && level >= 0) {
|
||||
bytenr = ctx->path_cache_entries[level].bytenr;
|
||||
ctx->use_path_cache = true;
|
||||
store_backref_shared_cache(ctx, root, bytenr, level, ret);
|
||||
i = level + 1;
|
||||
}
|
||||
|
||||
for ( ; i < BTRFS_MAX_LEVEL; i++)
|
||||
ctx->path_cache_entries[i].bytenr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cache the sharedness result for the data extent if we know our inode
|
||||
* has more than 1 file extent item that refers to the data extent.
|
||||
|
|
|
@ -3732,7 +3732,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
|||
}
|
||||
|
||||
/* update qgroup status and info */
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
err = btrfs_run_qgroups(trans);
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
if (err < 0)
|
||||
btrfs_handle_fs_error(fs_info, err,
|
||||
"failed to update qgroup status and info");
|
||||
|
|
|
@ -2828,13 +2828,22 @@ cleanup:
|
|||
}
|
||||
|
||||
/*
|
||||
* called from commit_transaction. Writes all changed qgroups to disk.
|
||||
* Writes all changed qgroups to disk.
|
||||
* Called by the transaction commit path and the qgroup assign ioctl.
|
||||
*/
|
||||
int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* In case we are called from the qgroup assign ioctl, assert that we
|
||||
* are holding the qgroup_ioctl_lock, otherwise we can race with a quota
|
||||
* disable operation (ioctl) and access a freed quota root.
|
||||
*/
|
||||
if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
|
||||
lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
|
||||
|
||||
if (!fs_info->quota_root)
|
||||
return ret;
|
||||
|
||||
|
|
|
@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
|
|||
|
||||
if (current->journal_info == trans)
|
||||
current->journal_info = NULL;
|
||||
btrfs_scrub_cancel(fs_info);
|
||||
|
||||
/*
|
||||
* If relocation is running, we can't cancel scrub because that will
|
||||
* result in a deadlock. Before relocating a block group, relocation
|
||||
* pauses scrub, then starts and commits a transaction before unpausing
|
||||
* scrub. If the transaction commit is being done by the relocation
|
||||
* task or triggered by another task and the relocation task is waiting
|
||||
* for the commit, and we end up here due to an error in the commit
|
||||
* path, then calling btrfs_scrub_cancel() will deadlock, as we are
|
||||
* asking for scrub to stop while having it asked to be paused higher
|
||||
* above in relocation code.
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
|
||||
btrfs_scrub_cancel(fs_info);
|
||||
|
||||
kmem_cache_free(btrfs_trans_handle_cachep, trans);
|
||||
}
|
||||
|
|
|
@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
|
|||
* So, we need to add a special mount option to scan for
|
||||
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
||||
*/
|
||||
flags |= FMODE_EXCL;
|
||||
|
||||
/*
|
||||
* Avoid using flag |= FMODE_EXCL here, as the systemd-udev may
|
||||
* initiate the device scan which may race with the user's mount
|
||||
* or mkfs command, resulting in failure.
|
||||
* Since the device scan is solely for reading purposes, there is
|
||||
* no need for FMODE_EXCL. Additionally, the devices are read again
|
||||
* during the mount process. It is ok to get some inconsistent
|
||||
* values temporarily, as the device paths of the fsid are the only
|
||||
* required information for assembling the volume.
|
||||
*/
|
||||
bdev = blkdev_get_by_path(path, flags, holder);
|
||||
if (IS_ERR(bdev))
|
||||
return ERR_CAST(bdev);
|
||||
|
@ -3266,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
|||
btrfs_scrub_pause(fs_info);
|
||||
ret = btrfs_relocate_block_group(fs_info, chunk_offset);
|
||||
btrfs_scrub_continue(fs_info);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
/*
|
||||
* If we had a transaction abort, stop all running scrubs.
|
||||
* See transaction.c:cleanup_transaction() why we do it here.
|
||||
*/
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
btrfs_scrub_cancel(fs_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||
if (!block_group)
|
||||
|
|
Loading…
Reference in New Issue