From 75cb379d2635215ad2c67750693f7dc45ad19a5f Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Mar 2018 15:25:26 -0400 Subject: [PATCH] btrfs: defer adding raid type kobject until after chunk relocation Any time the first block group of a new type is created, we add a new kobject to sysfs to hold the attributes for that type. Kobject-internal allocations always use GFP_KERNEL, making them prone to fs-reclaim races. While it appears as if this can occur any time a block group is created, the only times the first block group of a new type can be created in memory is at mount and when we create the first new block group during raid conversion. This patch adds a new list to track pending kobject additions and then handles them after we do chunk relocation. Between relocating the target chunk (or forcing allocation of a new chunk in the case of data) and removing the old chunk, we're in a safe place for fs-reclaim to occur. We're holding the volume mutex, which is already held across page faults, and the delete_unused_bgs_mutex, which will only stall the cleaner thread. Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ++++- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 60 +++++++++++++++++++++++++++++------------- fs/btrfs/sysfs.c | 2 +- fs/btrfs/volumes.c | 12 +++++++++ 5 files changed, 62 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ffa72ca5755d..8d3aa56b928b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -385,8 +385,9 @@ struct btrfs_dev_replace { /* For raid type sysfs entries */ struct raid_kobject { - int raid_type; + u64 flags; struct kobject kobj; + struct list_head list; }; struct btrfs_space_info { @@ -940,6 +941,8 @@ struct btrfs_fs_info { u32 thread_pool_size; struct kobject *space_info_kobj; + struct list_head pending_raid_kobjs; + spinlock_t pending_raid_kobjs_lock; /* uncontended */ u64 total_pinned; @@ -2700,6 +2703,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, u64 chunk_offset, u64 size); +void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1657d6aa4fa6..38b387ae78f8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2431,6 +2431,8 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->delalloc_roots); INIT_LIST_HEAD(&fs_info->caching_block_groups); + INIT_LIST_HEAD(&fs_info->pending_raid_kobjs); + spin_lock_init(&fs_info->pending_raid_kobjs_lock); spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e10f74dac493..0b1f01dd02de 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9918,9 +9918,39 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +/* link_block_group will queue up kobjects to add when we're reclaim-safe */ +void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *space_info; + struct raid_kobject *rkobj; + LIST_HEAD(list); + int index; + int ret = 0; + + spin_lock(&fs_info->pending_raid_kobjs_lock); + list_splice_init(&fs_info->pending_raid_kobjs, &list); + spin_unlock(&fs_info->pending_raid_kobjs_lock); + + list_for_each_entry(rkobj, &list, list) { + space_info = __find_space_info(fs_info, rkobj->flags); + index = btrfs_bg_flags_to_raid_index(rkobj->flags); + + ret = kobject_add(&rkobj->kobj, &space_info->kobj, + "%s", get_raid_name(index)); + if (ret) { + kobject_put(&rkobj->kobj); + break; + } + } + if (ret) + btrfs_warn(fs_info, + "failed to add kobject for block cache, ignoring"); +} + static void link_block_group(struct btrfs_block_group_cache *cache) { struct btrfs_space_info *space_info = cache->space_info; + struct btrfs_fs_info *fs_info = cache->fs_info; int index = btrfs_bg_flags_to_raid_index(cache->flags); bool first = false; @@ -9931,27 +9961,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache) up_write(&space_info->groups_sem); if (first) { - struct raid_kobject *rkobj; - int ret; - - rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); - if (!rkobj) - goto out_err; - rkobj->raid_type = index; - kobject_init(&rkobj->kobj, &btrfs_raid_ktype); - ret = kobject_add(&rkobj->kobj, &space_info->kobj, - "%s", get_raid_name(index)); - if (ret) { - kobject_put(&rkobj->kobj); - goto out_err; + struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); + if (!rkobj) { + btrfs_warn(cache->fs_info, + "couldn't alloc memory for raid level kobject"); + return; } + rkobj->flags = cache->flags; + kobject_init(&rkobj->kobj, &btrfs_raid_ktype); + + spin_lock(&fs_info->pending_raid_kobjs_lock); + list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs); + spin_unlock(&fs_info->pending_raid_kobjs_lock); space_info->block_group_kobjs[index] = &rkobj->kobj; } - - return; -out_err: - btrfs_warn(cache->fs_info, - "failed to add kobject for block cache, ignoring"); } static struct btrfs_block_group_cache * @@ -10167,6 +10190,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) inc_block_group_ro(cache, 1); } + btrfs_add_raid_kobjects(info); init_global_block_rsv(info); ret = 0; error: diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 6af7b58e1a90..ca067471cd46 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj, { struct btrfs_space_info *sinfo = to_space_info(kobj->parent); struct btrfs_block_group_cache *block_group; - int index = to_raid_kobj(kobj)->raid_type; + int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags); u64 val = 0; down_read(&sinfo->groups_sem); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73de042158f1..4fc6acf65220 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3003,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) if (ret) return ret; + /* + * We add the kobjects here (and after forcing data chunk creation) + * since relocation is the only place we'll create chunks of a new + * type at runtime. The only place where we'll remove the last + * chunk of a type is the call immediately below this one. Even + * so, we're protected against races with the cleaner thread since + * we're covered by the delete_unused_bgs_mutex. + */ + btrfs_add_raid_kobjects(fs_info); + trans = btrfs_start_trans_remove_block_group(root->fs_info, chunk_offset); if (IS_ERR(trans)) { @@ -3130,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, if (ret < 0) return ret; + btrfs_add_raid_kobjects(fs_info); + return 1; } }