for-5.0-rc4-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAlxWtJgACgkQxWXV+ddt WDsDow//ZpnyDwQWvSIfF2UUQOPlcBjbHKuBA7rU0wdybW635QYGR0mqnI+1VnMj 7ssUkeN6N0a2gQzrUG4Y+zpdzOWv2xQ4jKZ9GMOp9gwyzEFyPkcFXOnmM8UfYtVu e3fK65e8BZHmTeu0kGKah4Dt1g0t4fUmhsKR4Pfp5YNJC+zuuGTwUW1K/ZQHXJ+3 kTHc7WP1lsF7wgaZ+Gl+Kvp8fVrHVdygMVTdRBW8QaBgPLa/KExvK62jW+NmCYhj 7OIkWdew7e8IXc3Ie5IbOomHAv7IgqqgiO9VO9+n0EpyV4UobUgxrgBKJ+0yc1Ya eidbKhMslwUE50y00JVm+vw0gwQHkR+hZDn/mRB6xiIeI8tu/yQIJZ6AhYJXoByR cu8+SNO5Z5dOZ1f146ZH8lnkr24tuSnkDUhbRDR5pdb4tAHHej2ALzhbfbwbPEpF IverYLw5fOMGeRU/mBsjkVadpSZ4S0HVNU85ERdhLtVLK1PSaY2UkUaA+Ii5y7au EYDjaGMflmJ8cAVqgtgedEff2n8OKDnzRZlz4IPLI73MVSITGZkM7PmYmYsLm2Bs mDPnmyqR8kzcd1RRtSeZTvqOpAIZG+QUOmD2jiKrchmp54Sz0V/HJWRs3aQybD6Q ph0yAcbkvgp/ewe5IFgaI0kcyH7zYdL6GtiI2WUE3/8DObgrgsA= =E2PP -----END PGP SIGNATURE----- Merge tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - regression fix: transaction commit can run away due to delayed ref waiting heuristic, this is not necessary now because of the proper reservation mechanism introduced in 5.0 - regression fix: potential crash due to use-before-check of an ERR_PTR return value - fix for transaction abort during transaction commit that needs to properly clean up pending block groups - fix deadlock during b-tree node/leaf splitting, when this happens on some of the fundamental trees, we must prevent new tree block allocation to re-enter indirectly via the block group flushing path - potential memory leak after errors during mount * tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: On error always free subvol_name in btrfs_mount btrfs: clean up pending block groups when transaction commit aborts btrfs: fix potential oops in device_list_add btrfs: don't end the transaction for delayed refs in throttle Btrfs: fix deadlock when allocating tree block during leaf/node split
This commit is contained in:
commit
312b3a93dd
|
@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct extent_buffer *alloc_tree_block_no_bg_flush(
|
||||||
|
struct btrfs_trans_handle *trans,
|
||||||
|
struct btrfs_root *root,
|
||||||
|
u64 parent_start,
|
||||||
|
const struct btrfs_disk_key *disk_key,
|
||||||
|
int level,
|
||||||
|
u64 hint,
|
||||||
|
u64 empty_size)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
|
struct extent_buffer *ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are COWing a node/leaf from the extent, chunk, device or free
|
||||||
|
* space trees, make sure that we do not finish block group creation of
|
||||||
|
* pending block groups. We do this to avoid a deadlock.
|
||||||
|
* COWing can result in allocation of a new chunk, and flushing pending
|
||||||
|
* block groups (btrfs_create_pending_block_groups()) can be triggered
|
||||||
|
* when finishing allocation of a new chunk. Creation of a pending block
|
||||||
|
* group modifies the extent, chunk, device and free space trees,
|
||||||
|
* therefore we could deadlock with ourselves since we are holding a
|
||||||
|
* lock on an extent buffer that btrfs_create_pending_block_groups() may
|
||||||
|
* try to COW later.
|
||||||
|
* For similar reasons, we also need to delay flushing pending block
|
||||||
|
* groups when splitting a leaf or node, from one of those trees, since
|
||||||
|
* we are holding a write lock on it and its parent or when inserting a
|
||||||
|
* new root node for one of those trees.
|
||||||
|
*/
|
||||||
|
if (root == fs_info->extent_root ||
|
||||||
|
root == fs_info->chunk_root ||
|
||||||
|
root == fs_info->dev_root ||
|
||||||
|
root == fs_info->free_space_root)
|
||||||
|
trans->can_flush_pending_bgs = false;
|
||||||
|
|
||||||
|
ret = btrfs_alloc_tree_block(trans, root, parent_start,
|
||||||
|
root->root_key.objectid, disk_key, level,
|
||||||
|
hint, empty_size);
|
||||||
|
trans->can_flush_pending_bgs = true;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* does the dirty work in cow of a single block. The parent block (if
|
* does the dirty work in cow of a single block. The parent block (if
|
||||||
* supplied) is updated to point to the new cow copy. The new buffer is marked
|
* supplied) is updated to point to the new cow copy. The new buffer is marked
|
||||||
|
@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||||
if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
|
if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
|
||||||
parent_start = parent->start;
|
parent_start = parent->start;
|
||||||
|
|
||||||
/*
|
cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
|
||||||
* If we are COWing a node/leaf from the extent, chunk, device or free
|
level, search_start, empty_size);
|
||||||
* space trees, make sure that we do not finish block group creation of
|
|
||||||
* pending block groups. We do this to avoid a deadlock.
|
|
||||||
* COWing can result in allocation of a new chunk, and flushing pending
|
|
||||||
* block groups (btrfs_create_pending_block_groups()) can be triggered
|
|
||||||
* when finishing allocation of a new chunk. Creation of a pending block
|
|
||||||
* group modifies the extent, chunk, device and free space trees,
|
|
||||||
* therefore we could deadlock with ourselves since we are holding a
|
|
||||||
* lock on an extent buffer that btrfs_create_pending_block_groups() may
|
|
||||||
* try to COW later.
|
|
||||||
*/
|
|
||||||
if (root == fs_info->extent_root ||
|
|
||||||
root == fs_info->chunk_root ||
|
|
||||||
root == fs_info->dev_root ||
|
|
||||||
root == fs_info->free_space_root)
|
|
||||||
trans->can_flush_pending_bgs = false;
|
|
||||||
|
|
||||||
cow = btrfs_alloc_tree_block(trans, root, parent_start,
|
|
||||||
root->root_key.objectid, &disk_key, level,
|
|
||||||
search_start, empty_size);
|
|
||||||
trans->can_flush_pending_bgs = true;
|
|
||||||
if (IS_ERR(cow))
|
if (IS_ERR(cow))
|
||||||
return PTR_ERR(cow);
|
return PTR_ERR(cow);
|
||||||
|
|
||||||
|
@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
|
||||||
else
|
else
|
||||||
btrfs_node_key(lower, &lower_key, 0);
|
btrfs_node_key(lower, &lower_key, 0);
|
||||||
|
|
||||||
c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
|
c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
|
||||||
&lower_key, level, root->node->start, 0);
|
root->node->start, 0);
|
||||||
if (IS_ERR(c))
|
if (IS_ERR(c))
|
||||||
return PTR_ERR(c);
|
return PTR_ERR(c);
|
||||||
|
|
||||||
|
@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
|
||||||
mid = (c_nritems + 1) / 2;
|
mid = (c_nritems + 1) / 2;
|
||||||
btrfs_node_key(c, &disk_key, mid);
|
btrfs_node_key(c, &disk_key, mid);
|
||||||
|
|
||||||
split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
|
split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
|
||||||
&disk_key, level, c->start, 0);
|
c->start, 0);
|
||||||
if (IS_ERR(split))
|
if (IS_ERR(split))
|
||||||
return PTR_ERR(split);
|
return PTR_ERR(split);
|
||||||
|
|
||||||
|
@ -4260,8 +4282,8 @@ again:
|
||||||
else
|
else
|
||||||
btrfs_item_key(l, &disk_key, mid);
|
btrfs_item_key(l, &disk_key, mid);
|
||||||
|
|
||||||
right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
|
right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
|
||||||
&disk_key, 0, l->start, 0);
|
l->start, 0);
|
||||||
if (IS_ERR(right))
|
if (IS_ERR(right))
|
||||||
return PTR_ERR(right);
|
return PTR_ERR(right);
|
||||||
|
|
||||||
|
|
|
@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
|
||||||
flags | SB_RDONLY, device_name, data);
|
flags | SB_RDONLY, device_name, data);
|
||||||
if (IS_ERR(mnt_root)) {
|
if (IS_ERR(mnt_root)) {
|
||||||
root = ERR_CAST(mnt_root);
|
root = ERR_CAST(mnt_root);
|
||||||
|
kfree(subvol_name);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
|
||||||
if (error < 0) {
|
if (error < 0) {
|
||||||
root = ERR_PTR(error);
|
root = ERR_PTR(error);
|
||||||
mntput(mnt_root);
|
mntput(mnt_root);
|
||||||
|
kfree(subvol_name);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (IS_ERR(mnt_root)) {
|
if (IS_ERR(mnt_root)) {
|
||||||
root = ERR_CAST(mnt_root);
|
root = ERR_CAST(mnt_root);
|
||||||
|
kfree(subvol_name);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
btrfs_trans_release_chunk_metadata(trans);
|
btrfs_trans_release_chunk_metadata(trans);
|
||||||
|
|
||||||
if (lock && should_end_transaction(trans) &&
|
|
||||||
READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
|
|
||||||
spin_lock(&info->trans_lock);
|
|
||||||
if (cur_trans->state == TRANS_STATE_RUNNING)
|
|
||||||
cur_trans->state = TRANS_STATE_BLOCKED;
|
|
||||||
spin_unlock(&info->trans_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
|
if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
|
||||||
if (throttle)
|
if (throttle)
|
||||||
return btrfs_commit_transaction(trans);
|
return btrfs_commit_transaction(trans);
|
||||||
|
@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
|
||||||
kmem_cache_free(btrfs_trans_handle_cachep, trans);
|
kmem_cache_free(btrfs_trans_handle_cachep, trans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release reserved delayed ref space of all pending block groups of the
|
||||||
|
* transaction and remove them from the list
|
||||||
|
*/
|
||||||
|
static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
|
struct btrfs_block_group_cache *block_group, *tmp;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
|
||||||
|
btrfs_delayed_refs_rsv_release(fs_info, 1);
|
||||||
|
list_del_init(&block_group->bg_list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -2270,6 +2277,7 @@ scrub_continue:
|
||||||
btrfs_scrub_continue(fs_info);
|
btrfs_scrub_continue(fs_info);
|
||||||
cleanup_transaction:
|
cleanup_transaction:
|
||||||
btrfs_trans_release_metadata(trans);
|
btrfs_trans_release_metadata(trans);
|
||||||
|
btrfs_cleanup_pending_block_groups(trans);
|
||||||
btrfs_trans_release_chunk_metadata(trans);
|
btrfs_trans_release_chunk_metadata(trans);
|
||||||
trans->block_rsv = NULL;
|
trans->block_rsv = NULL;
|
||||||
btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
|
btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
|
||||||
|
|
|
@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||||
else
|
else
|
||||||
fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
|
fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
|
||||||
|
|
||||||
fs_devices->fsid_change = fsid_change_in_progress;
|
|
||||||
|
|
||||||
if (IS_ERR(fs_devices))
|
if (IS_ERR(fs_devices))
|
||||||
return ERR_CAST(fs_devices);
|
return ERR_CAST(fs_devices);
|
||||||
|
|
||||||
|
fs_devices->fsid_change = fsid_change_in_progress;
|
||||||
|
|
||||||
mutex_lock(&fs_devices->device_list_mutex);
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
list_add(&fs_devices->fs_list, &fs_uuids);
|
list_add(&fs_devices->fs_list, &fs_uuids);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue