From ff40adf7fbdff96860b1153332c0b1c7bab6e0c1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 24 Aug 2017 18:19:48 -0600 Subject: [PATCH 01/17] Btrfs: use the new helper wbc_to_write_flags This updates btrfs to use the helper wbc_to_write_flags which has been applied in ext4/xfs/f2fs/block. Please note that, with this, btrfs's dirty pages written by a writeback job will carry the flag REQ_BACKGROUND, which is currently used by writeback-throttle to determine whether it should go to get a request or wait. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d17783d70228..4ead6da5a645 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned int write_flags = 0; unsigned long nr_written = 0; - if (wbc->sync_mode == WB_SYNC_ALL) - write_flags = REQ_SYNC; + write_flags = wbc_to_write_flags(wbc); trace___extent_writepage(page, inode, wbc); @@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, unsigned long i, num_pages; unsigned long bio_flags = 0; unsigned long start, end; - unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; + unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); From 5f14efd3d437205143dcffcf776e0122eae1755a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 23 Aug 2017 12:15:09 -0600 Subject: [PATCH 02/17] Btrfs: do not reset bio->bi_ops while writing bio flush_epd_write_bio() sets bio->bi_opf by itself to honor REQ_SYNC, but it's not needed at all since bio->bi_opf has set up properly in both __extent_writepage() and write_one_eb(), and in the case of write_one_eb(), it also sets REQ_META, which we will lose in flush_epd_write_bio(). This remove this unnecessary bio->bi_opf setting. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4ead6da5a645..3738d245518c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4062,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd) if (epd->bio) { int ret; - bio_set_op_attrs(epd->bio, REQ_OP_WRITE, - epd->sync_io ? REQ_SYNC : 0); - ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ epd->bio = NULL; From bea7eafdbda3ba1d4b2ccb9cca829eefb7989bb9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 22 Aug 2017 23:46:00 -0700 Subject: [PATCH 03/17] Btrfs: fix incorrect {node,sector}size endianness from BTRFS_IOC_FS_INFO fs_info->super_copy->{node,sector}size are little-endian, but the ioctl should return the values in native endianness. Use the cached values in btrfs_fs_info instead. Found with sparse. Fixes: 80a773fbfc2d ("btrfs: retrieve more info from FS_INFO ioctl") Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ae8fbf9d3de2..cf1c2ee030dd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2769,9 +2769,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, } mutex_unlock(&fs_devices->device_list_mutex); - fi_args->nodesize = fs_info->super_copy->nodesize; - fi_args->sectorsize = fs_info->super_copy->sectorsize; - fi_args->clone_alignment = fs_info->super_copy->sectorsize; + fi_args->nodesize = fs_info->nodesize; + fi_args->sectorsize = fs_info->sectorsize; + fi_args->clone_alignment = fs_info->sectorsize; if (copy_to_user(arg, fi_args, sizeof(*fi_args))) ret = -EFAULT; From 63d71450c8d817649a79e37d685523f988b9cc98 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 1 Sep 2017 17:58:47 +0900 Subject: [PATCH 04/17] btrfs: clear ordered flag on cleaning up ordered extents Commit 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang") introduced btrfs_cleanup_ordered_extents() to cleanup submitted ordered extents. However, it does not clear the ordered bit (Private2) of corresponding pages. Thus, the following BUG occurs from free_pages_check_bad() (on btrfs/125 with nospace_cache). BUG: Bad page state in process btrfs pfn:3fa787 page:ffffdf2acfe9e1c0 count:0 mapcount:0 mapping: (null) index:0xd flags: 0x8000000000002008(uptodate|private_2) raw: 8000000000002008 0000000000000000 000000000000000d 00000000ffffffff raw: ffffdf2acf5c1b20 ffffb443802238b0 0000000000000000 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x2000(private_2) This patch clears the flag same as other places calling btrfs_dec_test_ordered_pending() for every page in the specified range. Fixes: 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang") Cc: # 4.12 Signed-off-by: Naohiro Aota Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d184a46e46c4..455c0f22fe2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode, const u64 offset, const u64 bytes) { + unsigned long index = offset >> PAGE_SHIFT; + unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + index++; + if (!page) + continue; + ClearPagePrivate2(page); + put_page(page); + } return __endio_write_update_ordered(inode, offset + PAGE_SIZE, bytes - PAGE_SIZE, false); } From 67c003f90fd68062d92a7ffade36f9b2a9098bd8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 1 Sep 2017 17:59:07 +0900 Subject: [PATCH 05/17] btrfs: finish ordered extent cleaning if no progress is found __endio_write_update_ordered() repeats the search until it reaches the end of the specified range. This works well with direct IO path, because before the function is called, it's ensured that there are ordered extents filling whole the range. It's not the case, however, when it's called from run_delalloc_range(): it is possible to have error in the midle of the loop in e.g. run_delalloc_nocow(), so that there exisits the range not covered by any ordered extents. By cleaning such "uncomplete" range, __endio_write_update_ordered() stucks at offset where there're no ordered extents. Since the ordered extents are created from head to tail, we can stop the search if there are no offset progress. Fixes: 524272607e88 ("btrfs: Handle delalloc error correctly to avoid ordered extent hang") Cc: # 4.12 Signed-off-by: Naohiro Aota Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 455c0f22fe2d..f78c5640c6dc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8396,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode, btrfs_work_func_t func; u64 ordered_offset = offset; u64 ordered_bytes = bytes; + u64 last_offset; int ret; if (btrfs_is_free_space_inode(BTRFS_I(inode))) { @@ -8407,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode, } again: + last_offset = ordered_offset; ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, ordered_bytes, @@ -8417,6 +8419,12 @@ again: btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL); btrfs_queue_work(wq, &ordered->work); out_test: + /* + * If btrfs_dec_test_ordered_pending does not find any ordered extent + * in the range, we can exit. + */ + if (ordered_offset == last_offset) + return; /* * our bio might span multiple ordered extents. If we haven't * completed the accounting for the whole dio, go back and try again From bb166d7207432d3c7d10c45dc052f12ba3a2121d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 25 Aug 2017 14:15:14 +0900 Subject: [PATCH 06/17] btrfs: fix NULL pointer dereference from free_reloc_roots() __del_reloc_root should be called before freeing up reloc_root->node. If not, calling __del_reloc_root() dereference reloc_root->node, causing the system BUG. Fixes: 6bdf131fac23 ("Btrfs: don't leak reloc root nodes on error") Cc: # 4.9 Signed-off-by: Naohiro Aota Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3a49a3c2fca4..9841faef08ea 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list) while (!list_empty(list)) { reloc_root = list_entry(list->next, struct btrfs_root, root_list); + __del_reloc_root(reloc_root); free_extent_buffer(reloc_root->node); free_extent_buffer(reloc_root->commit_root); reloc_root->node = NULL; reloc_root->commit_root = NULL; - __del_reloc_root(reloc_root); } } From ca6842bf01dc1ad41195eac1e343b4f08c496ba8 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Fri, 22 Jan 2016 09:13:25 +0900 Subject: [PATCH 07/17] Btrfs: send: fix error number for unknown inode types ENOTSUPP should not be returned to the user program. (cf. include/linux/errno.h) Therefore, EOPNOTSUPP is used instead of ENOTSUPP. Signed-off-by: Tsutomu Itoh Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8f1d3d6e7087..43430e6c99aa 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2640,7 +2640,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino) } else { btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", (int)(mode & S_IFMT)); - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; goto out; } From 6d6d282932d1a609e60dc4467677e0e863682f57 Mon Sep 17 00:00:00 2001 From: satoru takeuchi Date: Tue, 12 Sep 2017 22:42:52 +0900 Subject: [PATCH 08/17] btrfs: prevent to set invalid default subvolid `btrfs sub set-default` succeeds to set an ID which isn't corresponding to any fs/file tree. If such the bad ID is set to a filesystem, we can't mount this filesystem without specifying `subvol` or `subvolid` mount options. Fixes: 6ef5ed0d386b ("Btrfs: add ioctl and incompat flag to set the default mount subvol") Cc: Signed-off-by: Satoru Takeuchi Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index cf1c2ee030dd..d4a77993e52f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4057,6 +4057,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) ret = PTR_ERR(new_root); goto out; } + if (!is_fstree(new_root->objectid)) { + ret = -ENOENT; + goto out; + } path = btrfs_alloc_path(); if (!path) { From 78ad4ce014d025f41b8dde3a81876832ead643cf Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 8 Sep 2017 17:48:55 +0900 Subject: [PATCH 09/17] btrfs: propagate error to btrfs_cmp_data_prepare caller btrfs_cmp_data_prepare() (almost) always returns 0 i.e. ignoring errors from gather_extent_pages(). While the pages are freed by btrfs_cmp_data_free(), cmp->num_pages still has > 0. Then, btrfs_extent_same() try to access the already freed pages causing faults (or violates PageLocked assertion). This patch just return the error as is so that the caller stop the process. Signed-off-by: Naohiro Aota Fixes: f441460202cb ("btrfs: fix deadlock with extent-same and readpage") Cc: # 4.2 Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d4a77993e52f..802df5755cd3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3028,7 +3028,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, out: if (ret) btrfs_cmp_data_free(cmp); - return 0; + return ret; } static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) From c2faff790ccd11ea5be8e3ca99713f116fcd6030 Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Wed, 30 Aug 2017 16:33:16 +0900 Subject: [PATCH 10/17] btrfs: remove BTRFS_FS_QUOTA_DISABLING flag Currently, "btrfs quota enable" would fail after "btrfs quota disable" on the first time with syslog output "qgroup_rescan_init failed with -22", but it would succeed on the second time. When "quota disable" is called, BTRFS_FS_QUOTA_DISABLING flag bit will be set in fs_info->flags in btrfs_quota_disable(), but it will not be droppd in btrfs_run_qgroups() (which is called in btrfs_commit_transaction()) because quota_root has already been freed. If "quota enable" is called after that, both BTRFS_FS_QUOTA_DISABLING and BTRFS_FS_QUOTA_ENABLED flag would be dropped in the btrfs_run_qgroups() since quota_root is not NULL. This leads to the failure of "quota enable" on the first time. BTRFS_FS_QUOTA_DISABLING flag is not used outside of "quota disable" context and is equivalent to whether quota_root is NULL or not. btrfs_run_qgroups() checks whether quota_root is NULL or not in the first place. So, let's remove BTRFS_FS_QUOTA_DISABLING flag. Signed-off-by: Tomohiro Misono Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 - fs/btrfs/qgroup.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2add002662f4..b7ccfcc01732 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -708,7 +708,6 @@ struct btrfs_delayed_root; #define BTRFS_FS_OPEN 5 #define BTRFS_FS_QUOTA_ENABLED 6 #define BTRFS_FS_QUOTA_ENABLING 7 -#define BTRFS_FS_QUOTA_DISABLING 8 #define BTRFS_FS_UPDATE_UUID_TREE_GEN 9 #define BTRFS_FS_CREATING_FREE_SPACE_TREE 10 #define BTRFS_FS_BTREE_ERR 11 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5c8b61c86e61..770f667269f5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, } ret = 0; out: - set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); btrfs_free_path(path); return ret; } @@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, if (!fs_info->quota_root) goto out; clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; @@ -2086,8 +2084,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); spin_lock(&fs_info->qgroup_lock); while (!list_empty(&fs_info->dirty_qgroups)) { From fed3b381145e2e7c66b0b3f640851e1633ebd07f Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 13 Sep 2017 12:25:21 -0600 Subject: [PATCH 11/17] Btrfs: do not backup tree roots when fsync It doesn't make sense to backup tree roots when doing fsync, since during fsync those tree roots have not been consistent on disk. Signed-off-by: Liu Bo Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 27d458640536..0f2271815eb6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3641,7 +3641,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) u64 flags; do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); - backup_super_roots(fs_info); + + /* + * max_mirrors == 0 indicates we're from commit_transaction, + * not from fsync where the tree roots in fs_info have not + * been consistent on disk. + */ + if (max_mirrors == 0) + backup_super_roots(fs_info); sb = fs_info->super_for_commit; dev_item = &sb->dev_item; From bd7d63c2ceaf737eeb21630a2b62fc5fe34dba29 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 19 Sep 2017 17:50:09 -0600 Subject: [PATCH 12/17] Btrfs: use btrfs_op instead of bio_op in __btrfs_map_block This seems to be a leftover of commit cf8cddd38bab ("btrfs: don't abuse REQ_OP_* flags for btrfs_map_block"). It should use btrfs_op() helper to provide one of 'enum btrfs_map_op' types. Fixes: cf8cddd38bab ("btrfs: don't abuse REQ_OP_* flags for btrfs_map_block") Signed-off-by: Liu Bo Reviewed-by: Satoru Takeuchi Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d024f1b07282..6a72f88f77b6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6166,7 +6166,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, map_length = length; btrfs_bio_counter_inc_blocked(fs_info); - ret = __btrfs_map_block(fs_info, bio_op(bio), logical, + ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); From cf1167d5c1abf3bc42b2a1562bfa7937c05337e2 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 20 Sep 2017 17:50:18 -0600 Subject: [PATCH 13/17] Btrfs: fix kernel oops while reading compressed data The kernel oops happens at kernel BUG at fs/btrfs/extent_io.c:2104! ... RIP: clean_io_failure+0x263/0x2a0 [btrfs] It's showing that read-repair code is using an improper mirror index. This is due to the fact that compression read's endio hasn't recorded the failed mirror index in %cb->orig_bio. With this, btrfs's read-repair can work properly on reading compressed data. Signed-off-by: Liu Bo Reported-by: Paul Jones Tested-by: Paul Jones Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 883ecc58fd0d..c6aa53c4c102 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -107,6 +107,7 @@ static void end_compressed_bio_read(struct bio *bio) struct inode *inode; struct page *page; unsigned long index; + unsigned int mirror = btrfs_io_bio(bio)->mirror_num; int ret; if (bio->bi_status) @@ -118,6 +119,14 @@ static void end_compressed_bio_read(struct bio *bio) if (!refcount_dec_and_test(&cb->pending_bios)) goto out; + /* + * Record the correct mirror_num in cb->orig_bio so that + * read-repair can work properly. + */ + ASSERT(btrfs_io_bio(cb->orig_bio)); + btrfs_io_bio(cb->orig_bio)->mirror_num = mirror; + cb->mirror_num = mirror; + inode = cb->inode; ret = check_compressed_csum(BTRFS_I(inode), cb, (u64)bio->bi_iter.bi_sector << 9); From e6311f240c946788131ba2b97e14f37312688072 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 20 Sep 2017 17:50:19 -0600 Subject: [PATCH 14/17] Btrfs: skip checksum when reading compressed data if some IO have failed Currently even if the underlying disk reports failure on IO, compressed read endio still gets to verify checksum and reports it as a checksum error. In fact, if some IO have failed during reading a compressed data extent , there's no way the checksum could match, therefore, we can skip that in order to return error quickly to the upper layer. Please note that we need to do this after recording the failed mirror index so that read-repair in the upper layer's endio can work properly. Signed-off-by: Liu Bo Tested-by: Paul Jones Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index c6aa53c4c102..fc31af98a41b 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -108,7 +108,7 @@ static void end_compressed_bio_read(struct bio *bio) struct page *page; unsigned long index; unsigned int mirror = btrfs_io_bio(bio)->mirror_num; - int ret; + int ret = 0; if (bio->bi_status) cb->errors = 1; @@ -127,6 +127,13 @@ static void end_compressed_bio_read(struct bio *bio) btrfs_io_bio(cb->orig_bio)->mirror_num = mirror; cb->mirror_num = mirror; + /* + * Some IO in this cb have failed, just skip checksum as there + * is no way it could be correct. + */ + if (cb->errors == 1) + goto csum_failed; + inode = cb->inode; ret = check_compressed_csum(BTRFS_I(inode), cb, (u64)bio->bi_iter.bi_sector << 9); From 36b96fdc6b2dc6f4a0fedc563fa7508c91b90a10 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Sun, 17 Sep 2017 09:02:29 +0000 Subject: [PATCH 15/17] btrfs: Report error on removing qgroup if del_qgroup_item fails Previously, we were calling del_qgroup_item, and ignoring the return code resulting in a potential to have divergent in-memory state without an error. Perhaps, it makes sense to handle this error code, and put the filesystem into a read only, or similar state. This patch only adds reporting of the error if the error is fatal, (any error other than qgroup not found). Signed-off-by: Sargun Dhillon Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 770f667269f5..e172d4843eae 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1305,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, } } ret = del_qgroup_item(trans, quota_root, qgroupid); + if (ret && ret != -ENOENT) + goto out; while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, From 99c4e3b96c797f047be4e6b7c03cfca01959f146 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 15 Sep 2017 15:06:51 -0600 Subject: [PATCH 16/17] Btrfs: fix unexpected result when dio reading corrupted blocks commit 4246a0b63bd8 ("block: add a bi_error field to struct bio") changed the logic of how dio read endio reports errors. For single stripe dio read, %bio->bi_status reflects the error before verifying checksum, and now we're updating it when data block matches with its checksum, while in the mismatching case, %bio->bi_status is not updated to relfect that. When some blocks in a file have been corrupted on disk, reading such a file ends up with 1) checksum errors are reported in kernel log 2) read(2) returns successfully with some content being 0x01. In order to fix it, we need to report its checksum mismatch error to the upper layer (dio layer in this case) as well. Fixes: 4246a0b63bd8 ("block: add a bi_error field to struct bio") Signed-off-by: Liu Bo Reported-by: Goffredo Baroncelli Tested-by: Goffredo Baroncelli Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f78c5640c6dc..c242d0230db9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8366,11 +8366,8 @@ static void btrfs_endio_direct_read(struct bio *bio) struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); blk_status_t err = bio->bi_status; - if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) { + if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) err = btrfs_subio_endio_read(inode, io_bio, err); - if (!err) - bio->bi_status = 0; - } unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, dip->logical_offset + dip->bytes - 1); @@ -8378,7 +8375,7 @@ static void btrfs_endio_direct_read(struct bio *bio) kfree(dip); - dio_bio->bi_status = bio->bi_status; + dio_bio->bi_status = err; dio_end_io(dio_bio); if (io_bio->end_io) From 8c6c592831a09a28428448e68fb08c6bbb8b9b8b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Aug 2017 10:11:39 -0400 Subject: [PATCH 17/17] btrfs: log csums for all modified extents Amir reported a bug discovered by his cleaned up version of my dm-log-writes xfstests where we were missing csums at certain replay points. This is because fsx was doing an msync(), which essentially fsync()'s a specific range of a file. We will log all modified extents, but only search for the checksums in the range we are being asked to sync. We cannot simply log the extents in the range we're being asked because we are logging the inode item as it is currently, which if it has had a i_size update before the msync means we will miss extents when replaying. We could possibly get around this by marking the inode with the transaction that extended the i_size to see if we have this case, but this would be racy and we'd have to lock the whole range of the inode to make sure we didn't have an ordered extent outside of our range that was in the middle of completing. Fix this simply by keeping track of the modified extents range and logging the csums for the entire range of extents that we are logging. This makes the xfstest pass. Reported-by: Amir Goldstein Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ad7f4bab640b..c800d067fcbf 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct extent_map *em, *n; struct list_head extents; struct extent_map_tree *tree = &inode->extent_tree; + u64 logged_start, logged_end; u64 test_gen; int ret = 0; int num = 0; @@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, down_write(&inode->dio_sem); write_lock(&tree->lock); test_gen = root->fs_info->last_trans_committed; + logged_start = start; + logged_end = end; list_for_each_entry_safe(em, n, &tree->modified_extents, list) { list_del_init(&em->list); - /* * Just an arbitrary number, this can be really CPU intensive * once we start getting a lot of extents, and really once we @@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, if (em->generation <= test_gen) continue; + + if (em->start < logged_start) + logged_start = em->start; + if ((em->start + em->len - 1) > logged_end) + logged_end = em->start + em->len - 1; + /* Need a ref to keep it from getting evicted from cache */ refcount_inc(&em->refs); set_bit(EXTENT_FLAG_LOGGING, &em->flags); @@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, &extents, extent_cmp); - btrfs_get_logged_extents(inode, logged_list, start, end); + btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); /* * Some ordered extents started by fsync might have completed * before we could collect them into the list logged_list, which