From 16ff4b454f1b56e8d89a9075feed0dd6ac510c3d Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 11 Jun 2016 18:11:10 +0200 Subject: [PATCH 1/8] btrfs: Use correct format specifier Component mirror_num of struct btrfsic_block is defined as unsigned int. Use %u as format specifier. Signed-off-by: Heinrich Schuchardt Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/check-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b677a6ea6001..7706c8dc5fa6 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2645,7 +2645,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", + indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", btrfsic_get_block_type(state, block), block->logical_bytenr, block->dev_state->name, block->dev_bytenr, block->mirror_num); From c871b0f2fd27e7f9097d507f47de5270f88003b9 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 6 Jun 2016 12:01:23 -0700 Subject: [PATCH 2/8] Btrfs: check if extent buffer is aligned to sectorsize Thanks to fuzz testing, we can pass an invalid bytenr to extent buffer via alloc_extent_buffer(). An unaligned eb can have more pages than it should have, which ends up extent buffer's leak or some corrupted content in extent buffer. This adds a warning to let us quickly know what was happening. Now that alloc_extent_buffer() no more returns NULL, this changes its caller and callers of its caller to match with the new error handling. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 ++ fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent-tree.c | 10 ++++++---- fs/btrfs/extent_io.c | 15 ++++++++++++--- fs/btrfs/tree-log.c | 4 ++-- fs/btrfs/volumes.c | 4 ++-- 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 46025688f1d0..827c949fa4bc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2512,6 +2512,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, if (!btrfs_buffer_uptodate(tmp, 0, 0)) ret = -EIO; free_extent_buffer(tmp); + } else { + ret = PTR_ERR(tmp); } return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1142127f6e5e..7b5d5e8efde6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1098,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr) struct inode *btree_inode = root->fs_info->btree_inode; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); @@ -1114,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); @@ -1172,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 689d25ac6a68..5439e85c4813 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8016,8 +8016,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8659,8 +8660,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, level - 1); reada = 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a3412d68ad37..aaee3ef55ed8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4892,18 +4892,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int uptodate = 1; int ret; + if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) { + btrfs_err(fs_info, "bad tree block start %llu", start); + return ERR_PTR(-EINVAL); + } + eb = find_extent_buffer(fs_info, start); if (eb) return eb; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); - if (!p) + if (!p) { + exists = ERR_PTR(-ENOMEM); goto free_eb; + } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { @@ -4948,8 +4955,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index aa8fed11f749..8ab1dc64cbba 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr); - if (!next) - return -ENOMEM; + if (IS_ERR(next)) + return PTR_ERR(next); if (*level == 1) { ret = wc->process_func(root, next, wc, ptr_gen); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fcbda4341f7d..c3a2900c6030 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6607,8 +6607,8 @@ int btrfs_read_sys_array(struct btrfs_root *root) * overallocate but we can keep it as-is, only the first page is used. */ sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); - if (!sb) - return -ENOMEM; + if (IS_ERR(sb)) + return PTR_ERR(sb); set_extent_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* From 64c12921e11b3a0c10d088606e328c58e29274d8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 8 Jun 2016 00:36:38 -0400 Subject: [PATCH 3/8] btrfs: account for non-CoW'd blocks in btrfs_abort_transaction The test for !trans->blocks_used in btrfs_abort_transaction is insufficient to determine whether it's safe to drop the transaction handle on the floor. btrfs_cow_block, informed by should_cow_block, can return blocks that have already been CoW'd in the current transaction. trans->blocks_used is only incremented for new block allocations. If an operation overlaps the blocks in the current transaction entirely and must abort the transaction, we'll happily let it clean up the trans handle even though it may have modified the blocks and will commit an incomplete operation. In the long-term, I'd like to do closer tracking of when the fs is actually modified so we can still recover as gracefully as possible, but that approach will need some discussion. In the short term, since this is the only code using trans->blocks_used, let's just switch it to a bool indicating whether any blocks were used and set it when should_cow_block returns false. Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++++- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.h | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 827c949fa4bc..6276add8538a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1554,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -2777,8 +2778,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5439e85c4813..29e5d000bbee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8045,7 +8045,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4339b6613f19..bf70d33b5791 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9fe0ec2bf0fe..c5abee4f01ad 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -110,7 +110,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +120,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to From 3b6571c180da85e43550c608e954ab7b2a31d954 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 27 May 2016 13:03:04 -0400 Subject: [PATCH 4/8] Btrfs: don't BUG_ON() in btrfs_orphan_add This is just a screwup for developers, so change it to an ASSERT() so developers notice when things go wrong and deal with the error appropriately if ASSERT() isn't enabled. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Mark Fasheh Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e5558d9e396e..bb62418b8023 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3271,7 +3271,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); - BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */ + ASSERT(!ret); + if (ret) { + atomic_dec(&root->orphan_inodes); + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + if (insert) + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + return ret; + } } /* insert an orphan item to track this unlinked/truncated file */ From 90c711ab380d633bf85249bd5d819edb601feda7 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sun, 12 Jun 2016 23:39:58 -0400 Subject: [PATCH 5/8] btrfs: avoid blocking open_ctree from cleaner_kthread This fixes a problem introduced in commit 2f3165ecf103599f82bf0ea254039db335fb5005 "btrfs: don't force mounts to wait for cleaner_kthread to delete one or more subvolumes". open_ctree eventually calls btrfs_replay_log which in turn calls btrfs_commit_super which tries to lock the cleaner_mutex, causing a recursive mutex deadlock during mount. Instead of playing whack-a-mole trying to keep up with all the functions that may want to lock cleaner_mutex, put all the cleaner_mutex lockers back where they were, and attack the problem more directly: keep cleaner_kthread asleep until the filesystem is mounted. When filesystems are mounted read-only and later remounted read-write, open_ctree did not set fs_info->open and neither does anything else. Set this flag in btrfs_remount so that neither btrfs_delete_unused_bgs nor cleaner_kthread get confused by the common case of "/" filesystem read-only mount followed by read-write remount. Signed-off-by: Zygo Blaxell Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 25 ++++++++++--------------- fs/btrfs/super.c | 2 ++ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7b5d5e8efde6..789f5f233940 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1806,6 +1806,13 @@ static int cleaner_kthread(void *arg) if (btrfs_need_cleaner_sleep(root)) goto sleep; + /* + * Do not do anything if we might cause open_ctree() to block + * before we have finished mounting the filesystem. + */ + if (!root->fs_info->open) + goto sleep; + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; @@ -2520,7 +2527,6 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int max_active; - bool cleaner_mutex_locked = false; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2999,13 +3005,6 @@ retry_root_backup: goto fail_sysfs; } - /* - * Hold the cleaner_mutex thread here so that we don't block - * for a long time on btrfs_recover_relocation. cleaner_kthread - * will wait for us to finish mounting the filesystem. - */ - mutex_lock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = true; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) @@ -3065,8 +3064,10 @@ retry_root_backup: ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; - /* We locked cleaner_mutex before creating cleaner_kthread. */ + + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(tree_root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); @@ -3074,8 +3075,6 @@ retry_root_backup: goto fail_qgroup; } } - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3189,10 +3188,6 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: - if (cleaner_mutex_locked) { - mutex_unlock(&fs_info->cleaner_mutex); - cleaner_mutex_locked = false; - } btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bf70d33b5791..60e7179ed4b7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } } sb->s_flags &= ~MS_RDONLY; + + fs_info->open = 1; } out: wake_up_process(fs_info->transaction_kthread); From f7af3934c2bccba261972261ac8ebcbf92a346b2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 17 Jun 2016 18:15:25 +0200 Subject: [PATCH 6/8] btrfs: use new error message helper in qgroup_account_snapshot We've renamed btrfs_std_error, this one is left from last merge. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f6e24cb423ae..4e74b5733030 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1385,7 +1385,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, switch_commit_roots(trans->transaction, fs_info); ret = btrfs_write_and_wait_transaction(trans, src); if (ret) - btrfs_std_error(fs_info, ret, + btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction for qgroup"); out: From 89c5a5441d703ba068699524ae59f7806e9b173d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 16 Jun 2016 17:34:28 +0200 Subject: [PATCH 7/8] btrfs: remove build fixup for qgroup_account_snapshot Introduced in 2c1984f244838477aab ("btrfs: build fixup for qgroup_account_snapshot") as temporary bisectability build fixup. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e74b5733030..765845742fde 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1311,11 +1311,6 @@ int btrfs_defrag_root(struct btrfs_root *root) return ret; } -/* Bisesctability fixup, remove in 4.8 */ -#ifndef btrfs_std_error -#define btrfs_std_error btrfs_handle_fs_error -#endif - /* * Do all special snapshot related qgroup dirty hack. * From dd5c93111dc9d26e038ac437f7a403d617e82c62 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 16 Jun 2016 22:07:58 +0530 Subject: [PATCH 8/8] Btrfs: btrfs_check_super_valid: Allow 4096 as stripesize Older btrfs-progs/mkfs.btrfs sets 4096 as the stripesize. Hence restricting stripesize to be equal to sectorsize would cause super block validation to return an error on architectures where PAGE_SIZE is not equal to 4096. Hence as a workaround, this commit allows stripesize to be set to 4096 bytes. Signed-off-by: Chandan Rajendra Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 789f5f233940..54cca7a1572b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4134,7 +4134,8 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, ret = -EINVAL; } if (!is_power_of_2(btrfs_super_stripesize(sb)) || - btrfs_super_stripesize(sb) != sectorsize) { + ((btrfs_super_stripesize(sb) != sectorsize) && + (btrfs_super_stripesize(sb) != 4096))) { btrfs_err(fs_info, "invalid stripesize %u", btrfs_super_stripesize(sb)); ret = -EINVAL;