diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d8e539fe5544..4c476281b66b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; /* ioprio of readahead is set to idle */ #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) +#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) + /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. @@ -1448,10 +1450,9 @@ struct btrfs_fs_info { u64 total_pinned; - /* protected by the delalloc lock, used to keep from writing - * metadata until there is a nice batch - */ - u64 dirty_metadata_bytes; + /* used to keep from writing metadata until there is a nice batch */ + struct percpu_counter dirty_metadata_bytes; + s32 dirty_metadata_batch; struct list_head dirty_cowonly_roots; struct btrfs_fs_devices *fs_devices; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 04f98e3ffd90..34ace168eebc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -946,18 +946,20 @@ static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct extent_io_tree *tree; + struct btrfs_fs_info *fs_info; + int ret; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { - struct btrfs_root *root = BTRFS_I(mapping->host)->root; - u64 num_dirty; - unsigned long thresh = 32 * 1024 * 1024; if (wbc->for_kupdate) return 0; + fs_info = BTRFS_I(mapping->host)->root->fs_info; /* this is a bit racy, but that's ok */ - num_dirty = root->fs_info->dirty_metadata_bytes; - if (num_dirty < thresh) + ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH); + if (ret < 0) return 0; } return btree_write_cache_pages(mapping, wbc); @@ -1125,24 +1127,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { + struct btrfs_fs_info *fs_info = root->fs_info; + if (btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) { + fs_info->running_transaction->transid) { btrfs_assert_tree_locked(buf); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { - spin_lock(&root->fs_info->delalloc_lock); - if (root->fs_info->dirty_metadata_bytes >= buf->len) - root->fs_info->dirty_metadata_bytes -= buf->len; - else { - spin_unlock(&root->fs_info->delalloc_lock); - btrfs_panic(root->fs_info, -EOVERFLOW, - "Can't clear %lu bytes from " - " dirty_mdatadata_bytes (%llu)", - buf->len, - root->fs_info->dirty_metadata_bytes); - } - spin_unlock(&root->fs_info->delalloc_lock); - + __percpu_counter_add(&fs_info->dirty_metadata_bytes, + -buf->len, + fs_info->dirty_metadata_batch); /* ugh, clear_extent_buffer_dirty needs to lock the page */ btrfs_set_lock_blocking(buf); clear_extent_buffer_dirty(buf); @@ -2008,10 +2002,18 @@ int open_ctree(struct super_block *sb, goto fail_srcu; } + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); + if (ret) { + err = ret; + goto fail_bdi; + } + fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * + (1 + ilog2(nr_cpu_ids)); + fs_info->btree_inode = new_inode(sb); if (!fs_info->btree_inode) { err = -ENOMEM; - goto fail_bdi; + goto fail_dirty_metadata_bytes; } mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -2266,6 +2268,7 @@ int open_ctree(struct super_block *sb, leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); stripesize = btrfs_super_stripesize(disk_super); + fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); /* * mixed block groups end up with duplicate but slightly offset @@ -2728,6 +2731,8 @@ fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); +fail_dirty_metadata_bytes: + percpu_counter_destroy(&fs_info->dirty_metadata_bytes); fail_bdi: bdi_destroy(&fs_info->bdi); fail_srcu: @@ -3406,6 +3411,7 @@ int close_ctree(struct btrfs_root *root) btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); + percpu_counter_destroy(&fs_info->dirty_metadata_bytes); bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); @@ -3448,11 +3454,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) (unsigned long long)transid, (unsigned long long)root->fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); - if (!was_dirty) { - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->dirty_metadata_bytes += buf->len; - spin_unlock(&root->fs_info->delalloc_lock); - } + if (!was_dirty) + __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, + buf->len, + root->fs_info->dirty_metadata_batch); } static void __btrfs_btree_balance_dirty(struct btrfs_root *root, @@ -3462,8 +3467,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, * looks as though older kernels can get into trouble with * this code, they end up stuck in balance_dirty_pages forever */ - u64 num_dirty; - unsigned long thresh = 32 * 1024 * 1024; + int ret; if (current->flags & PF_MEMALLOC) return; @@ -3471,9 +3475,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, if (flush_delayed) btrfs_balance_delayed_items(root); - num_dirty = root->fs_info->dirty_metadata_bytes; - - if (num_dirty > thresh) { + ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH); + if (ret > 0) { balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bda36fef301e..b9019c847c58 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3122,12 +3122,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); spin_unlock(&eb->refs_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_lock(&fs_info->delalloc_lock); - if (fs_info->dirty_metadata_bytes >= eb->len) - fs_info->dirty_metadata_bytes -= eb->len; - else - WARN_ON(1); - spin_unlock(&fs_info->delalloc_lock); + __percpu_counter_add(&fs_info->dirty_metadata_bytes, + -eb->len, + fs_info->dirty_metadata_batch); ret = 1; } else { spin_unlock(&eb->refs_lock);