2018-04-04 01:16:55 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
2007-06-12 21:07:21 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
|
|
*/
|
|
|
|
|
2018-04-04 01:16:55 +08:00
|
|
|
#ifndef BTRFS_DISK_IO_H
|
|
|
|
#define BTRFS_DISK_IO_H
|
2007-02-02 22:18:22 +08:00
|
|
|
|
2015-12-15 00:42:10 +08:00
|
|
|
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
2008-04-11 04:19:33 +08:00
|
|
|
#define BTRFS_SUPER_INFO_SIZE 4096
|
2008-12-09 05:46:26 +08:00
|
|
|
|
|
|
|
#define BTRFS_SUPER_MIRROR_MAX 3
|
|
|
|
#define BTRFS_SUPER_MIRROR_SHIFT 12
|
|
|
|
|
2017-06-16 07:48:05 +08:00
|
|
|
/*
|
|
|
|
* Fixed blocksize for all devices, applies to specific ways of reading
|
|
|
|
* metadata like superblock. Must meet the set_blocksize requirements.
|
|
|
|
*
|
|
|
|
* Do not change.
|
|
|
|
*/
|
|
|
|
#define BTRFS_BDEV_BLOCKSIZE (4096)
|
|
|
|
|
2014-07-30 06:25:45 +08:00
|
|
|
enum btrfs_wq_endio_type {
|
2018-11-27 22:25:13 +08:00
|
|
|
BTRFS_WQ_ENDIO_DATA,
|
|
|
|
BTRFS_WQ_ENDIO_METADATA,
|
|
|
|
BTRFS_WQ_ENDIO_FREE_SPACE,
|
|
|
|
BTRFS_WQ_ENDIO_RAID56,
|
2013-01-30 07:40:14 +08:00
|
|
|
};
|
|
|
|
|
2008-12-09 05:46:26 +08:00
|
|
|
static inline u64 btrfs_sb_offset(int mirror)
|
|
|
|
{
|
2015-12-15 00:42:10 +08:00
|
|
|
u64 start = SZ_16K;
|
2008-12-09 05:46:26 +08:00
|
|
|
if (mirror)
|
|
|
|
return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
|
|
|
|
return BTRFS_SUPER_INFO_OFFSET;
|
|
|
|
}
|
|
|
|
|
2008-03-25 03:01:56 +08:00
|
|
|
struct btrfs_device;
|
2008-03-25 03:02:07 +08:00
|
|
|
struct btrfs_fs_devices;
|
2007-03-23 00:13:20 +08:00
|
|
|
|
2020-01-24 22:33:00 +08:00
|
|
|
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
|
2020-01-24 22:32:59 +08:00
|
|
|
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
|
2019-03-20 21:58:13 +08:00
|
|
|
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
btrfs: Check the first key and level for cached extent buffer
[BUG]
When reading a file from a fuzzed image, kernel can panic like:
BTRFS warning (device loop0): csum failed root 5 ino 270 off 0 csum 0x98f94189 expected csum 0x00000000 mirror 1
assertion failed: !memcmp_extent_buffer(b, &disk_key, offsetof(struct btrfs_leaf, items[0].key), sizeof(disk_key)), file: fs/btrfs/ctree.c, line: 2544
------------[ cut here ]------------
kernel BUG at fs/btrfs/ctree.h:3500!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
RIP: 0010:btrfs_search_slot.cold.24+0x61/0x63 [btrfs]
Call Trace:
btrfs_lookup_csum+0x52/0x150 [btrfs]
__btrfs_lookup_bio_sums+0x209/0x640 [btrfs]
btrfs_submit_bio_hook+0x103/0x170 [btrfs]
submit_one_bio+0x59/0x80 [btrfs]
extent_read_full_page+0x58/0x80 [btrfs]
generic_file_read_iter+0x2f6/0x9d0
__vfs_read+0x14d/0x1a0
vfs_read+0x8d/0x140
ksys_read+0x52/0xc0
do_syscall_64+0x60/0x210
entry_SYSCALL_64_after_hwframe+0x49/0xbe
[CAUSE]
The fuzzed image has a corrupted leaf whose first key doesn't match its
parent:
checksum tree key (CSUM_TREE ROOT_ITEM 0)
node 29741056 level 1 items 14 free 107 generation 19 owner CSUM_TREE
fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6
chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c
...
key (EXTENT_CSUM EXTENT_CSUM 79691776) block 29761536 gen 19
leaf 29761536 items 1 free space 1726 generation 19 owner CSUM_TREE
leaf 29761536 flags 0x1(WRITTEN) backref revision 1
fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6
chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c
item 0 key (EXTENT_CSUM EXTENT_CSUM 8798638964736) itemoff 1751 itemsize 2244
range start 8798638964736 end 8798641262592 length 2297856
When reading the above tree block, we have extent_buffer->refs = 2 in
the context:
- initial one from __alloc_extent_buffer()
alloc_extent_buffer()
|- __alloc_extent_buffer()
|- atomic_set(&eb->refs, 1)
- one being added to fs_info->buffer_radix
alloc_extent_buffer()
|- check_buffer_tree_ref()
|- atomic_inc(&eb->refs)
So if even we call free_extent_buffer() in read_tree_block or other
similar situation, we only decrease the refs by 1, it doesn't reach 0
and won't be freed right now.
The staled eb and its corrupted content will still be kept cached.
Furthermore, we have several extra cases where we either don't do first
key check or the check is not proper for all callers:
- scrub
We just don't have first key in this context.
- shared tree block
One tree block can be shared by several snapshot/subvolume trees.
In that case, the first key check for one subvolume doesn't apply to
another.
So for the above reasons, a corrupted extent buffer can sneak into the
buffer cache.
[FIX]
Call verify_level_key in read_block_for_search to do another
verification. For that purpose the function is exported.
Due to above reasons, although we can free corrupted extent buffer from
cache, we still need the check in read_block_for_search(), for scrub and
shared tree blocks.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202757
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202759
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202761
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202767
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202769
Reported-by: Yoon Jungyeon <jungyeon@gatech.edu>
CC: stable@vger.kernel.org # 4.19+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-03-12 17:10:40 +08:00
|
|
|
struct btrfs_key *first_key, u64 parent_transid);
|
2018-03-29 09:08:11 +08:00
|
|
|
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
2020-11-05 23:45:18 +08:00
|
|
|
u64 owner_root, u64 parent_transid,
|
|
|
|
int level, struct btrfs_key *first_key);
|
2016-06-23 06:54:24 +08:00
|
|
|
struct extent_buffer *btrfs_find_create_tree_block(
|
|
|
|
struct btrfs_fs_info *fs_info,
|
2020-11-05 23:45:20 +08:00
|
|
|
u64 bytenr, u64 owner_root,
|
|
|
|
int level);
|
2019-03-20 21:30:02 +08:00
|
|
|
void btrfs_clean_tree_block(struct extent_buffer *buf);
|
2020-11-19 07:06:20 +08:00
|
|
|
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
|
btrfs: lift read-write mount setup from mount and remount
Mounting rw and remounting from ro to rw naturally share invariants and
functionality which result in a correctly setup rw filesystem. Luckily,
there is even a strong unity in the code which implements them. In
mount's open_ctree, these operations mostly happen after an early return
for ro file systems, and in remount, they happen in a section devoted to
remounting ro->rw, after some remount specific validation passes.
However, there are unfortunately a few differences. There are small
deviations in the order of some of the operations, remount does not
start orphan cleanup in root_tree or fs_tree, remount does not create
the free space tree, and remount does not handle "one-shot" mount
options like clear_cache and uuid tree rescan.
Since we want to add building the free space tree to remount, and also
to start the same orphan cleanup process on a filesystem mounted as ro
then remounted rw, we would benefit from unifying the logic between the
two code paths.
This patch only lifts the existing common functionality, and leaves a
natural path for fixing the discrepancies.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2020-11-19 07:06:16 +08:00
|
|
|
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
|
2019-10-02 01:57:35 +08:00
|
|
|
int __cold open_ctree(struct super_block *sb,
|
2011-11-17 14:10:02 +08:00
|
|
|
struct btrfs_fs_devices *fs_devices,
|
|
|
|
char *options);
|
2019-10-02 01:57:35 +08:00
|
|
|
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
2017-02-11 02:04:32 +08:00
|
|
|
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
|
2020-02-13 23:24:32 +08:00
|
|
|
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
|
|
|
|
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
|
|
|
int copy_num);
|
2016-06-22 09:16:51 +08:00
|
|
|
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
|
2020-01-24 22:32:21 +08:00
|
|
|
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
|
|
|
struct btrfs_key *key);
|
2013-05-15 15:48:19 +08:00
|
|
|
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_root *root);
|
2014-05-08 05:06:09 +08:00
|
|
|
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
|
2013-09-25 21:47:44 +08:00
|
|
|
|
|
|
|
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
|
2020-05-16 01:35:55 +08:00
|
|
|
u64 objectid, bool check_ref);
|
2020-06-16 10:17:36 +08:00
|
|
|
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
|
|
|
|
u64 objectid, dev_t anon_dev);
|
2020-10-20 04:02:31 +08:00
|
|
|
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_path *path,
|
|
|
|
u64 objectid);
|
2013-09-25 21:47:44 +08:00
|
|
|
|
2020-01-24 22:32:53 +08:00
|
|
|
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
|
2008-11-13 03:34:12 +08:00
|
|
|
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
|
2016-06-23 06:54:24 +08:00
|
|
|
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
|
|
|
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
2013-05-15 15:48:19 +08:00
|
|
|
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|
|
|
struct btrfs_root *root);
|
2020-11-12 16:47:57 +08:00
|
|
|
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
|
2020-09-18 21:34:33 +08:00
|
|
|
struct page *page, u64 start, u64 end,
|
|
|
|
int mirror);
|
2020-09-18 21:34:38 +08:00
|
|
|
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
|
|
|
int mirror_num, unsigned long bio_flags);
|
2013-09-20 04:07:01 +08:00
|
|
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
2016-06-15 21:22:56 +08:00
|
|
|
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
2013-09-20 04:07:01 +08:00
|
|
|
#endif
|
|
|
|
|
2013-05-15 15:48:20 +08:00
|
|
|
/*
|
|
|
|
* This function is used to grab the root, and avoid it is freed when we
|
|
|
|
* access it. But it doesn't ensure that the tree is not dropped.
|
|
|
|
*
|
|
|
|
* If you want to ensure the whole tree is safe, you should use
|
|
|
|
* fs_info->subvol_srcu
|
|
|
|
*/
|
2020-01-24 22:33:01 +08:00
|
|
|
static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
2013-05-15 15:48:20 +08:00
|
|
|
{
|
2020-01-24 22:32:26 +08:00
|
|
|
if (!root)
|
|
|
|
return NULL;
|
2017-03-03 16:55:18 +08:00
|
|
|
if (refcount_inc_not_zero(&root->refs))
|
2013-05-15 15:48:20 +08:00
|
|
|
return root;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-02-15 05:11:42 +08:00
|
|
|
void btrfs_put_root(struct btrfs_root *root);
|
2007-10-16 04:14:19 +08:00
|
|
|
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
|
2012-05-06 19:23:47 +08:00
|
|
|
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
|
|
|
int atomic);
|
2018-03-29 09:08:11 +08:00
|
|
|
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
|
|
|
|
struct btrfs_key *first_key);
|
2017-06-03 15:38:06 +08:00
|
|
|
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
2014-07-30 06:25:45 +08:00
|
|
|
enum btrfs_wq_endio_type metadata);
|
2020-10-21 14:24:53 +08:00
|
|
|
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
|
|
|
int mirror_num, unsigned long bio_flags,
|
2020-12-02 14:47:57 +08:00
|
|
|
u64 dio_file_offset,
|
2020-10-21 14:24:53 +08:00
|
|
|
extent_submit_bio_start_t *submit_bio_start);
|
2018-07-18 23:36:24 +08:00
|
|
|
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
|
|
|
int mirror_num);
|
2021-02-04 18:22:17 +08:00
|
|
|
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root);
|
2008-09-06 04:13:11 +08:00
|
|
|
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_fs_info *fs_info);
|
2009-01-22 01:54:03 +08:00
|
|
|
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root);
|
2016-07-21 08:44:12 +08:00
|
|
|
void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
|
2016-06-23 06:54:24 +08:00
|
|
|
struct btrfs_fs_info *fs_info);
|
2012-03-02 00:24:58 +08:00
|
|
|
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
|
2016-06-23 06:54:24 +08:00
|
|
|
struct btrfs_fs_info *fs_info);
|
2011-09-13 18:44:20 +08:00
|
|
|
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
|
|
|
u64 objectid);
|
|
|
|
int btree_lock_page_hook(struct page *page, void *data,
|
|
|
|
void (*flush_fn)(void *));
|
2015-08-19 15:54:15 +08:00
|
|
|
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
2020-12-07 23:32:33 +08:00
|
|
|
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
|
2020-12-07 23:32:32 +08:00
|
|
|
int btrfs_init_root_free_objectid(struct btrfs_root *root);
|
2014-07-30 06:55:42 +08:00
|
|
|
int __init btrfs_end_io_wq_init(void);
|
2018-02-20 00:24:18 +08:00
|
|
|
void __cold btrfs_end_io_wq_exit(void);
|
2009-02-13 03:09:45 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
2011-07-27 04:11:19 +08:00
|
|
|
void btrfs_set_buffer_lockdep_class(u64 objectid,
|
|
|
|
struct extent_buffer *eb, int level);
|
2009-02-13 03:09:45 +08:00
|
|
|
#else
|
2011-07-27 04:11:19 +08:00
|
|
|
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
|
|
|
|
struct extent_buffer *eb, int level)
|
2009-02-13 03:09:45 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2018-04-04 01:16:55 +08:00
|
|
|
|
2007-02-02 22:18:22 +08:00
|
|
|
#endif
|