Btrfs: track dirty block groups on their own list
Currently any time we try to update the block groups on disk we will walk _all_ block groups and check for the ->dirty flag to see if it is set. This function can get called several times during a commit. So if you have several terabytes of data you will be a very sad panda as we will loop through _all_ of the block groups several times, which makes the commit take a while which slows down the rest of the file system operations. This patch introduces a dirty list for the block groups that we get added to when we dirty the block group for the first time. Then we simply update any block groups that have been dirtied since the last time we called btrfs_write_dirty_block_groups. This allows us to clean up how we write the free space cache out so it is much cleaner. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
e7070be198
commit
ce93ec548c
|
@ -1238,7 +1238,6 @@ enum btrfs_disk_cache_state {
|
||||||
BTRFS_DC_ERROR = 1,
|
BTRFS_DC_ERROR = 1,
|
||||||
BTRFS_DC_CLEAR = 2,
|
BTRFS_DC_CLEAR = 2,
|
||||||
BTRFS_DC_SETUP = 3,
|
BTRFS_DC_SETUP = 3,
|
||||||
BTRFS_DC_NEED_WRITE = 4,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_caching_control {
|
struct btrfs_caching_control {
|
||||||
|
@ -1276,7 +1275,6 @@ struct btrfs_block_group_cache {
|
||||||
unsigned long full_stripe_len;
|
unsigned long full_stripe_len;
|
||||||
|
|
||||||
unsigned int ro:1;
|
unsigned int ro:1;
|
||||||
unsigned int dirty:1;
|
|
||||||
unsigned int iref:1;
|
unsigned int iref:1;
|
||||||
unsigned int has_caching_ctl:1;
|
unsigned int has_caching_ctl:1;
|
||||||
unsigned int removed:1;
|
unsigned int removed:1;
|
||||||
|
@ -1314,6 +1312,9 @@ struct btrfs_block_group_cache {
|
||||||
struct list_head ro_list;
|
struct list_head ro_list;
|
||||||
|
|
||||||
atomic_t trimming;
|
atomic_t trimming;
|
||||||
|
|
||||||
|
/* For dirty block groups */
|
||||||
|
struct list_head dirty_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* delayed seq elem */
|
/* delayed seq elem */
|
||||||
|
|
|
@ -74,8 +74,9 @@ enum {
|
||||||
RESERVE_ALLOC_NO_ACCOUNT = 2,
|
RESERVE_ALLOC_NO_ACCOUNT = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int update_block_group(struct btrfs_root *root,
|
static int update_block_group(struct btrfs_trans_handle *trans,
|
||||||
u64 bytenr, u64 num_bytes, int alloc);
|
struct btrfs_root *root, u64 bytenr,
|
||||||
|
u64 num_bytes, int alloc);
|
||||||
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
u64 bytenr, u64 num_bytes, u64 parent,
|
u64 bytenr, u64 num_bytes, u64 parent,
|
||||||
|
@ -3315,120 +3316,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root)
|
struct btrfs_root *root)
|
||||||
{
|
{
|
||||||
struct btrfs_block_group_cache *cache;
|
struct btrfs_block_group_cache *cache;
|
||||||
int err = 0;
|
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||||
|
int ret = 0;
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
u64 last = 0;
|
|
||||||
|
if (list_empty(&cur_trans->dirty_bgs))
|
||||||
|
return 0;
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
again:
|
/*
|
||||||
while (1) {
|
* We don't need the lock here since we are protected by the transaction
|
||||||
cache = btrfs_lookup_first_block_group(root->fs_info, last);
|
* commit. We want to do the cache_save_setup first and then run the
|
||||||
while (cache) {
|
* delayed refs to make sure we have the best chance at doing this all
|
||||||
if (cache->disk_cache_state == BTRFS_DC_CLEAR)
|
* in one shot.
|
||||||
break;
|
*/
|
||||||
cache = next_block_group(root, cache);
|
while (!list_empty(&cur_trans->dirty_bgs)) {
|
||||||
}
|
cache = list_first_entry(&cur_trans->dirty_bgs,
|
||||||
if (!cache) {
|
struct btrfs_block_group_cache,
|
||||||
if (last == 0)
|
dirty_list);
|
||||||
break;
|
list_del_init(&cache->dirty_list);
|
||||||
last = 0;
|
if (cache->disk_cache_state == BTRFS_DC_CLEAR)
|
||||||
continue;
|
cache_save_setup(cache, trans, path);
|
||||||
}
|
if (!ret)
|
||||||
err = cache_save_setup(cache, trans, path);
|
ret = btrfs_run_delayed_refs(trans, root,
|
||||||
last = cache->key.objectid + cache->key.offset;
|
(unsigned long) -1);
|
||||||
|
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
|
||||||
|
btrfs_write_out_cache(root, trans, cache, path);
|
||||||
|
if (!ret)
|
||||||
|
ret = write_one_cache_group(trans, root, path, cache);
|
||||||
btrfs_put_block_group(cache);
|
btrfs_put_block_group(cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (1) {
|
|
||||||
if (last == 0) {
|
|
||||||
err = btrfs_run_delayed_refs(trans, root,
|
|
||||||
(unsigned long)-1);
|
|
||||||
if (err) /* File system offline */
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
cache = btrfs_lookup_first_block_group(root->fs_info, last);
|
|
||||||
while (cache) {
|
|
||||||
if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
|
|
||||||
btrfs_put_block_group(cache);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cache->dirty)
|
|
||||||
break;
|
|
||||||
cache = next_block_group(root, cache);
|
|
||||||
}
|
|
||||||
if (!cache) {
|
|
||||||
if (last == 0)
|
|
||||||
break;
|
|
||||||
last = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cache->disk_cache_state == BTRFS_DC_SETUP)
|
|
||||||
cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
|
|
||||||
cache->dirty = 0;
|
|
||||||
last = cache->key.objectid + cache->key.offset;
|
|
||||||
|
|
||||||
err = write_one_cache_group(trans, root, path, cache);
|
|
||||||
btrfs_put_block_group(cache);
|
|
||||||
if (err) /* File system offline */
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
/*
|
|
||||||
* I don't think this is needed since we're just marking our
|
|
||||||
* preallocated extent as written, but just in case it can't
|
|
||||||
* hurt.
|
|
||||||
*/
|
|
||||||
if (last == 0) {
|
|
||||||
err = btrfs_run_delayed_refs(trans, root,
|
|
||||||
(unsigned long)-1);
|
|
||||||
if (err) /* File system offline */
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
cache = btrfs_lookup_first_block_group(root->fs_info, last);
|
|
||||||
while (cache) {
|
|
||||||
/*
|
|
||||||
* Really this shouldn't happen, but it could if we
|
|
||||||
* couldn't write the entire preallocated extent and
|
|
||||||
* splitting the extent resulted in a new block.
|
|
||||||
*/
|
|
||||||
if (cache->dirty) {
|
|
||||||
btrfs_put_block_group(cache);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
|
|
||||||
break;
|
|
||||||
cache = next_block_group(root, cache);
|
|
||||||
}
|
|
||||||
if (!cache) {
|
|
||||||
if (last == 0)
|
|
||||||
break;
|
|
||||||
last = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = btrfs_write_out_cache(root, trans, cache, path);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we didn't have an error then the cache state is still
|
|
||||||
* NEED_WRITE, so we can set it to WRITTEN.
|
|
||||||
*/
|
|
||||||
if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
|
|
||||||
cache->disk_cache_state = BTRFS_DC_WRITTEN;
|
|
||||||
last = cache->key.objectid + cache->key.offset;
|
|
||||||
btrfs_put_block_group(cache);
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
|
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
return err;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
|
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
|
||||||
|
@ -5375,8 +5298,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
|
||||||
btrfs_free_reserved_data_space(inode, num_bytes);
|
btrfs_free_reserved_data_space(inode, num_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int update_block_group(struct btrfs_root *root,
|
static int update_block_group(struct btrfs_trans_handle *trans,
|
||||||
u64 bytenr, u64 num_bytes, int alloc)
|
struct btrfs_root *root, u64 bytenr,
|
||||||
|
u64 num_bytes, int alloc)
|
||||||
{
|
{
|
||||||
struct btrfs_block_group_cache *cache = NULL;
|
struct btrfs_block_group_cache *cache = NULL;
|
||||||
struct btrfs_fs_info *info = root->fs_info;
|
struct btrfs_fs_info *info = root->fs_info;
|
||||||
|
@ -5414,6 +5338,14 @@ static int update_block_group(struct btrfs_root *root,
|
||||||
if (!alloc && cache->cached == BTRFS_CACHE_NO)
|
if (!alloc && cache->cached == BTRFS_CACHE_NO)
|
||||||
cache_block_group(cache, 1);
|
cache_block_group(cache, 1);
|
||||||
|
|
||||||
|
spin_lock(&trans->transaction->dirty_bgs_lock);
|
||||||
|
if (list_empty(&cache->dirty_list)) {
|
||||||
|
list_add_tail(&cache->dirty_list,
|
||||||
|
&trans->transaction->dirty_bgs);
|
||||||
|
btrfs_get_block_group(cache);
|
||||||
|
}
|
||||||
|
spin_unlock(&trans->transaction->dirty_bgs_lock);
|
||||||
|
|
||||||
byte_in_group = bytenr - cache->key.objectid;
|
byte_in_group = bytenr - cache->key.objectid;
|
||||||
WARN_ON(byte_in_group > cache->key.offset);
|
WARN_ON(byte_in_group > cache->key.offset);
|
||||||
|
|
||||||
|
@ -5424,7 +5356,6 @@ static int update_block_group(struct btrfs_root *root,
|
||||||
cache->disk_cache_state < BTRFS_DC_CLEAR)
|
cache->disk_cache_state < BTRFS_DC_CLEAR)
|
||||||
cache->disk_cache_state = BTRFS_DC_CLEAR;
|
cache->disk_cache_state = BTRFS_DC_CLEAR;
|
||||||
|
|
||||||
cache->dirty = 1;
|
|
||||||
old_val = btrfs_block_group_used(&cache->item);
|
old_val = btrfs_block_group_used(&cache->item);
|
||||||
num_bytes = min(total, cache->key.offset - byte_in_group);
|
num_bytes = min(total, cache->key.offset - byte_in_group);
|
||||||
if (alloc) {
|
if (alloc) {
|
||||||
|
@ -6103,7 +6034,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = update_block_group(root, bytenr, num_bytes, 0);
|
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, extent_root, ret);
|
btrfs_abort_transaction(trans, extent_root, ret);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -7063,7 +6994,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = update_block_group(root, ins->objectid, ins->offset, 1);
|
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
|
||||||
if (ret) { /* -ENOENT, logic error */
|
if (ret) { /* -ENOENT, logic error */
|
||||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||||
ins->objectid, ins->offset);
|
ins->objectid, ins->offset);
|
||||||
|
@ -7152,7 +7083,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = update_block_group(root, ins->objectid, root->nodesize, 1);
|
ret = update_block_group(trans, root, ins->objectid, root->nodesize,
|
||||||
|
1);
|
||||||
if (ret) { /* -ENOENT, logic error */
|
if (ret) { /* -ENOENT, logic error */
|
||||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||||
ins->objectid, ins->offset);
|
ins->objectid, ins->offset);
|
||||||
|
@ -9005,6 +8937,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
|
||||||
INIT_LIST_HEAD(&cache->cluster_list);
|
INIT_LIST_HEAD(&cache->cluster_list);
|
||||||
INIT_LIST_HEAD(&cache->bg_list);
|
INIT_LIST_HEAD(&cache->bg_list);
|
||||||
INIT_LIST_HEAD(&cache->ro_list);
|
INIT_LIST_HEAD(&cache->ro_list);
|
||||||
|
INIT_LIST_HEAD(&cache->dirty_list);
|
||||||
btrfs_init_free_space_ctl(cache);
|
btrfs_init_free_space_ctl(cache);
|
||||||
atomic_set(&cache->trimming, 0);
|
atomic_set(&cache->trimming, 0);
|
||||||
|
|
||||||
|
@ -9068,9 +9001,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
|
||||||
* b) Setting 'dirty flag' makes sure that we flush
|
* b) Setting 'dirty flag' makes sure that we flush
|
||||||
* the new space cache info onto disk.
|
* the new space cache info onto disk.
|
||||||
*/
|
*/
|
||||||
cache->disk_cache_state = BTRFS_DC_CLEAR;
|
|
||||||
if (btrfs_test_opt(root, SPACE_CACHE))
|
if (btrfs_test_opt(root, SPACE_CACHE))
|
||||||
cache->dirty = 1;
|
cache->disk_cache_state = BTRFS_DC_CLEAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
read_extent_buffer(leaf, &cache->item,
|
read_extent_buffer(leaf, &cache->item,
|
||||||
|
@ -9461,6 +9393,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock(&trans->transaction->dirty_bgs_lock);
|
||||||
|
if (!list_empty(&block_group->dirty_list)) {
|
||||||
|
list_del_init(&block_group->dirty_list);
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
}
|
||||||
|
spin_unlock(&trans->transaction->dirty_bgs_lock);
|
||||||
|
|
||||||
btrfs_remove_free_space_cache(block_group);
|
btrfs_remove_free_space_cache(block_group);
|
||||||
|
|
||||||
spin_lock(&block_group->space_info->lock);
|
spin_lock(&block_group->space_info->lock);
|
||||||
|
|
|
@ -1243,6 +1243,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
|
||||||
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
|
||||||
|
|
||||||
root = root->fs_info->tree_root;
|
root = root->fs_info->tree_root;
|
||||||
|
|
||||||
|
@ -1266,9 +1267,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
|
||||||
ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
|
ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
|
||||||
path, block_group->key.objectid);
|
path, block_group->key.objectid);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
spin_lock(&block_group->lock);
|
dcs = BTRFS_DC_ERROR;
|
||||||
block_group->disk_cache_state = BTRFS_DC_ERROR;
|
|
||||||
spin_unlock(&block_group->lock);
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
btrfs_err(root->fs_info,
|
btrfs_err(root->fs_info,
|
||||||
|
@ -1277,6 +1276,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
block_group->disk_cache_state = dcs;
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
iput(inode);
|
iput(inode);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -248,6 +248,8 @@ loop:
|
||||||
INIT_LIST_HEAD(&cur_trans->pending_chunks);
|
INIT_LIST_HEAD(&cur_trans->pending_chunks);
|
||||||
INIT_LIST_HEAD(&cur_trans->switch_commits);
|
INIT_LIST_HEAD(&cur_trans->switch_commits);
|
||||||
INIT_LIST_HEAD(&cur_trans->pending_ordered);
|
INIT_LIST_HEAD(&cur_trans->pending_ordered);
|
||||||
|
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
|
||||||
|
spin_lock_init(&cur_trans->dirty_bgs_lock);
|
||||||
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
||||||
extent_io_tree_init(&cur_trans->dirty_pages,
|
extent_io_tree_init(&cur_trans->dirty_pages,
|
||||||
fs_info->btree_inode->i_mapping);
|
fs_info->btree_inode->i_mapping);
|
||||||
|
@ -1028,7 +1030,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
||||||
while (1) {
|
while (1) {
|
||||||
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
|
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
|
||||||
if (old_root_bytenr == root->node->start &&
|
if (old_root_bytenr == root->node->start &&
|
||||||
old_root_used == btrfs_root_used(&root->root_item))
|
old_root_used == btrfs_root_used(&root->root_item) &&
|
||||||
|
(!extent_root ||
|
||||||
|
list_empty(&trans->transaction->dirty_bgs)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
btrfs_set_root_node(&root->root_item, root->node);
|
btrfs_set_root_node(&root->root_item, root->node);
|
||||||
|
@ -1047,6 +1051,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
|
||||||
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
|
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1067,10 +1074,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
eb = btrfs_lock_root_node(fs_info->tree_root);
|
eb = btrfs_lock_root_node(fs_info->tree_root);
|
||||||
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
|
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
|
||||||
0, &eb);
|
0, &eb);
|
||||||
|
@ -1990,6 +1993,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||||
switch_commit_roots(cur_trans, root->fs_info);
|
switch_commit_roots(cur_trans, root->fs_info);
|
||||||
|
|
||||||
assert_qgroups_uptodate(trans);
|
assert_qgroups_uptodate(trans);
|
||||||
|
ASSERT(list_empty(&cur_trans->dirty_bgs));
|
||||||
update_super_roots(root);
|
update_super_roots(root);
|
||||||
|
|
||||||
btrfs_set_super_log_root(root->fs_info->super_copy, 0);
|
btrfs_set_super_log_root(root->fs_info->super_copy, 0);
|
||||||
|
|
|
@ -58,6 +58,8 @@ struct btrfs_transaction {
|
||||||
struct list_head pending_chunks;
|
struct list_head pending_chunks;
|
||||||
struct list_head pending_ordered;
|
struct list_head pending_ordered;
|
||||||
struct list_head switch_commits;
|
struct list_head switch_commits;
|
||||||
|
struct list_head dirty_bgs;
|
||||||
|
spinlock_t dirty_bgs_lock;
|
||||||
struct btrfs_delayed_ref_root delayed_refs;
|
struct btrfs_delayed_ref_root delayed_refs;
|
||||||
int aborted;
|
int aborted;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue