for-6.2-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmOSLtIACgkQxWXV+ddt WDvpQA//dQ3Wosz5puFNiZvoSUn/BnYJueZHjwF0bWY8OYINkF1PvDenu/WotyFz Ozf4Yl4Afxncz+FjDnOtlpr6KsSU5NqdGM3NrY0eNsxd2t1KrTsN0LgkA4m24p8b YsYp7pygbMm7c+h0X4uFpebY4lABkEPCBXnI//ktsls0xG5sOvGfZA3rdUP0bou2 JTn6hk+s0cLTNoTiOCGNHRJbeTzHLR0viZj/E4LCJfCeJvAmOLZamUjqe9sBNYAg YtsrZTpUIL3JgmRi5B6jG4fHSXOnE14mKmRIR3xPME6J6eoYyNOeuSh1oNmJEuoE B7nD5We+x5+isjXNw/V5CQrs7FF09UbdpbNb9NF5CYQWv40OCeefuai1opGtBUxX dvbfmf1blYpWW/wfFOKQwMOsl8kZIZYx68FW2OBUNglB6yRpX/3QgFSGb8kPCr83 DW2ttqwkpSNPMKk92I/owIc4BRvZ+LMR/PimEHB/Sa2apZA2/L+7RGwoaaei1QNX 1tJxHWeJFLDZ+YRxjO1eKqhWdGQPn1kkq8LoXLi3tGaNF4kYQfhWOSM3WRowvx1q f99XRgA8JQnqZS83zqRIspWlpFK0CFdvzG1Zlqx+eoxERfeaMNA2fHxv1YCyFV4+ TiXgsnCo+PIBwlvL/HjUWZgYE9+AD+NN5vyoE2UDYff4AgBFTE8= =Nqg9 -----END PGP SIGNATURE----- Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "This round there are a lot of cleanups and moved code so the diffstat looks huge, otherwise there are some nice performance improvements and an update to raid56 reliability. User visible features: - raid56 reliability vs performance trade off: - fix destructive RMW for raid5 data (raid6 still needs work): do full checksum verification for all data during RMW cycle, this should prevent rewriting potentially corrupted data without notice - stripes are cached in memory which should reduce the performance impact but still can hurt some workloads - checksums are verified after repair again - this is the last option without introducing additional features (write intent bitmap, journal, another tree), the extra checksum read/verification was supposed to be avoided by the original implementation exactly for performance reasons but that caused all the reliability problems - discard=async by default for devices that support it - implement emergency flush reserve to avoid almost all unnecessary transaction aborts due to ENOSPC in cases where there are too many delayed refs or delayed allocation - skip block group synchronization if there's no change in used bytes, can reduce transaction commit count for some workloads Performance improvements: - fiemap and lseek: - overall speedup due to skipping unnecessary or duplicate searches (-40% run time) - cache some data structures and sharedness of extents (-30% run time) - send: - faster backref resolution when finding clones - cached leaf to root mapping for faster backref walking - improved clone/sharing detection - overall run time improvements (-70%) Core: - module initialization converted to a table of function pointers run in a sequence - preparation for fscrypt, extend passing file names across calls, dir item can store encryption status - raid56 updates: - more accurate error tracking of sectors within stripe - simplify recovery path and remove dedicated endio worker kthread - simplify scrub call paths - refactoring to support the extra data checksum verification during RMW cycle - tree block parentness checks consolidated and done at metadata read time - improved error handling - cleanups: - move a lot of code for better synchronization between kernel and user space sources, split big files - enum cleanups - GFP flag cleanups - header file cleanups, prototypes, dependencies - redundant parameter cleanups - inline extent handling simplifications - inode parameter conversion - data structure cleanups, reductions, renames, merges" * tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits) btrfs: print transaction aborted messages with an error level btrfs: sync some cleanups from progs into uapi/btrfs.h btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range btrfs: fix extent map use-after-free when handling missing device in read_one_chunk btrfs: remove outdated logic from overwrite_item() and add assertion btrfs: unify overwrite_item() and do_overwrite_item() btrfs: replace strncpy() with strscpy() btrfs: fix uninitialized variable in find_first_clear_extent_bit btrfs: fix uninitialized parent in insert_state btrfs: add might_sleep() annotations btrfs: add stack helpers for a few btrfs items btrfs: add nr_global_roots to the super block definition btrfs: remove BTRFS_LEAF_DATA_OFFSET btrfs: add helpers for manipulating leaf items and data btrfs: add eb to btrfs_node_key_ptr_offset btrfs: pass the extent buffer for the btrfs_item_nr helpers btrfs: move the csum helpers into ctree.h btrfs: move eb offset helpers into extent_io.h btrfs: move file_extent_item helpers into file-item.h btrfs: move leaf_data_end into ctree.c ...
This commit is contained in:
commit
149c51f876
|
@ -23,15 +23,15 @@ obj-$(CONFIG_BTRFS_FS) := btrfs.o
|
|||
|
||||
btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
file-item.o inode-item.o disk-io.o \
|
||||
transaction.o inode.o file.o tree-defrag.o \
|
||||
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
||||
transaction.o inode.o file.o defrag.o \
|
||||
extent_map.o sysfs.o accessors.o xattr.o ordered-data.o \
|
||||
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
||||
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
|
||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
|
||||
subpage.o tree-mod-log.o extent-io-tree.o
|
||||
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "accessors.h"
|
||||
|
||||
static bool check_setget_bounds(const struct extent_buffer *eb,
|
||||
const void *ptr, unsigned off, int size)
|
||||
|
@ -23,6 +24,13 @@ static bool check_setget_bounds(const struct extent_buffer *eb,
|
|||
return true;
|
||||
}
|
||||
|
||||
void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb)
|
||||
{
|
||||
token->eb = eb;
|
||||
token->kaddr = page_address(eb->pages[0]);
|
||||
token->offset = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Macro templates that define helpers to read/write extent buffer data of a
|
||||
* given size, that are also used via ctree.h for access to item members by
|
||||
|
@ -160,7 +168,7 @@ DEFINE_BTRFS_SETGET_BITS(64)
|
|||
void btrfs_node_key(const struct extent_buffer *eb,
|
||||
struct btrfs_disk_key *disk_key, int nr)
|
||||
{
|
||||
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
|
||||
unsigned long ptr = btrfs_node_key_ptr_offset(eb, nr);
|
||||
read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
|
||||
struct btrfs_key_ptr, key, disk_key);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -11,10 +11,10 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "ctree.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_ACL_H
|
||||
#define BTRFS_ACL_H
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
|
||||
|
||||
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
|
||||
int btrfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
struct posix_acl *acl, int type);
|
||||
int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
struct posix_acl *acl, int type);
|
||||
|
||||
#else
|
||||
|
||||
#define btrfs_get_acl NULL
|
||||
#define btrfs_set_acl NULL
|
||||
static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct posix_acl *acl,
|
||||
int type)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -7,10 +7,128 @@
|
|||
#define BTRFS_BACKREF_H
|
||||
|
||||
#include <linux/btrfs.h>
|
||||
#include "messages.h"
|
||||
#include "ulist.h"
|
||||
#include "disk-io.h"
|
||||
#include "extent_io.h"
|
||||
|
||||
/*
|
||||
* Used by implementations of iterate_extent_inodes_t (see definition below) to
|
||||
* signal that backref iteration can stop immediately and no error happened.
|
||||
* The value must be non-negative and must not be 0, 1 (which is a common return
|
||||
* value from things like btrfs_search_slot() and used internally in the backref
|
||||
* walking code) and different from BACKREF_FOUND_SHARED and
|
||||
* BACKREF_FOUND_NOT_SHARED
|
||||
*/
|
||||
#define BTRFS_ITERATE_EXTENT_INODES_STOP 5
|
||||
|
||||
/*
|
||||
* Should return 0 if no errors happened and iteration of backrefs should
|
||||
* continue. Can return BTRFS_ITERATE_EXTENT_INODES_STOP or any other non-zero
|
||||
* value to immediately stop iteration and possibly signal an error back to
|
||||
* the caller.
|
||||
*/
|
||||
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes,
|
||||
u64 root, void *ctx);
|
||||
|
||||
/*
|
||||
* Context and arguments for backref walking functions. Some of the fields are
|
||||
* to be filled by the caller of such functions while other are filled by the
|
||||
* functions themselves, as described below.
|
||||
*/
|
||||
struct btrfs_backref_walk_ctx {
|
||||
/*
|
||||
* The address of the extent for which we are doing backref walking.
|
||||
* Can be either a data extent or a metadata extent.
|
||||
*
|
||||
* Must always be set by the top level caller.
|
||||
*/
|
||||
u64 bytenr;
|
||||
/*
|
||||
* Offset relative to the target extent. This is only used for data
|
||||
* extents, and it's meaningful because we can have file extent items
|
||||
* that point only to a section of a data extent ("bookend" extents),
|
||||
* and we want to filter out any that don't point to a section of the
|
||||
* data extent containing the given offset.
|
||||
*
|
||||
* Must always be set by the top level caller.
|
||||
*/
|
||||
u64 extent_item_pos;
|
||||
/*
|
||||
* If true and bytenr corresponds to a data extent, then references from
|
||||
* all file extent items that point to the data extent are considered,
|
||||
* @extent_item_pos is ignored.
|
||||
*/
|
||||
bool ignore_extent_item_pos;
|
||||
/* A valid transaction handle or NULL. */
|
||||
struct btrfs_trans_handle *trans;
|
||||
/*
|
||||
* The file system's info object, can not be NULL.
|
||||
*
|
||||
* Must always be set by the top level caller.
|
||||
*/
|
||||
struct btrfs_fs_info *fs_info;
|
||||
/*
|
||||
* Time sequence acquired from btrfs_get_tree_mod_seq(), in case the
|
||||
* caller joined the tree mod log to get a consistent view of b+trees
|
||||
* while we do backref walking, or BTRFS_SEQ_LAST.
|
||||
* When using BTRFS_SEQ_LAST, delayed refs are not checked and it uses
|
||||
* commit roots when searching b+trees - this is a special case for
|
||||
* qgroups used during a transaction commit.
|
||||
*/
|
||||
u64 time_seq;
|
||||
/*
|
||||
* Used to collect the bytenr of metadata extents that point to the
|
||||
* target extent.
|
||||
*/
|
||||
struct ulist *refs;
|
||||
/*
|
||||
* List used to collect the IDs of the roots from which the target
|
||||
* extent is accessible. Can be NULL in case the caller does not care
|
||||
* about collecting root IDs.
|
||||
*/
|
||||
struct ulist *roots;
|
||||
/*
|
||||
* Used by iterate_extent_inodes() and the main backref walk code
|
||||
* (find_parent_nodes()). Lookup and store functions for an optional
|
||||
* cache which maps the logical address (bytenr) of leaves to an array
|
||||
* of root IDs.
|
||||
*/
|
||||
bool (*cache_lookup)(u64 leaf_bytenr, void *user_ctx,
|
||||
const u64 **root_ids_ret, int *root_count_ret);
|
||||
void (*cache_store)(u64 leaf_bytenr, const struct ulist *root_ids,
|
||||
void *user_ctx);
|
||||
/*
|
||||
* If this is not NULL, then the backref walking code will call this
|
||||
* for each indirect data extent reference as soon as it finds one,
|
||||
* before collecting all the remaining backrefs and before resolving
|
||||
* indirect backrefs. This allows for the caller to terminate backref
|
||||
* walking as soon as it finds one backref that matches some specific
|
||||
* criteria. The @cache_lookup and @cache_store callbacks should not
|
||||
* be NULL in order to use this callback.
|
||||
*/
|
||||
iterate_extent_inodes_t *indirect_ref_iterator;
|
||||
/*
|
||||
* If this is not NULL, then the backref walking code will call this for
|
||||
* each extent item it's meant to process before it actually starts
|
||||
* processing it. If this returns anything other than 0, then it stops
|
||||
* the backref walking code immediately.
|
||||
*/
|
||||
int (*check_extent_item)(u64 bytenr, const struct btrfs_extent_item *ei,
|
||||
const struct extent_buffer *leaf, void *user_ctx);
|
||||
/*
|
||||
* If this is not NULL, then the backref walking code will call this for
|
||||
* each extent data ref it finds (BTRFS_EXTENT_DATA_REF_KEY keys) before
|
||||
* processing that data ref. If this callback return false, then it will
|
||||
* ignore this data ref and it will never resolve the indirect data ref,
|
||||
* saving time searching for leaves in a fs tree with file extent items
|
||||
* matching the data ref.
|
||||
*/
|
||||
bool (*skip_data_ref)(u64 root, u64 ino, u64 offset, void *user_ctx);
|
||||
/* Context object to pass to the callbacks defined above. */
|
||||
void *user_ctx;
|
||||
};
|
||||
|
||||
struct inode_fs_paths {
|
||||
struct btrfs_path *btrfs_path;
|
||||
struct btrfs_root *fs_root;
|
||||
|
@ -23,17 +141,59 @@ struct btrfs_backref_shared_cache_entry {
|
|||
bool is_shared;
|
||||
};
|
||||
|
||||
struct btrfs_backref_shared_cache {
|
||||
#define BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE 8
|
||||
|
||||
struct btrfs_backref_share_check_ctx {
|
||||
/* Ulists used during backref walking. */
|
||||
struct ulist refs;
|
||||
/*
|
||||
* The current leaf the caller of btrfs_is_data_extent_shared() is at.
|
||||
* Typically the caller (at the moment only fiemap) tries to determine
|
||||
* the sharedness of data extents point by file extent items from entire
|
||||
* leaves.
|
||||
*/
|
||||
u64 curr_leaf_bytenr;
|
||||
/*
|
||||
* The previous leaf the caller was at in the previous call to
|
||||
* btrfs_is_data_extent_shared(). This may be the same as the current
|
||||
* leaf. On the first call it must be 0.
|
||||
*/
|
||||
u64 prev_leaf_bytenr;
|
||||
/*
|
||||
* A path from a root to a leaf that has a file extent item pointing to
|
||||
* a given data extent should never exceed the maximum b+tree height.
|
||||
*/
|
||||
struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
|
||||
bool use_cache;
|
||||
struct btrfs_backref_shared_cache_entry path_cache_entries[BTRFS_MAX_LEVEL];
|
||||
bool use_path_cache;
|
||||
/*
|
||||
* Cache the sharedness result for the last few extents we have found,
|
||||
* but only for extents for which we have multiple file extent items
|
||||
* that point to them.
|
||||
* It's very common to have several file extent items that point to the
|
||||
* same extent (bytenr) but with different offsets and lengths. This
|
||||
* typically happens for COW writes, partial writes into prealloc
|
||||
* extents, NOCOW writes after snapshoting a root, hole punching or
|
||||
* reflinking within the same file (less common perhaps).
|
||||
* So keep a small cache with the lookup results for the extent pointed
|
||||
* by the last few file extent items. This cache is checked, with a
|
||||
* linear scan, whenever btrfs_is_data_extent_shared() is called, so
|
||||
* it must be small so that it does not negatively affect performance in
|
||||
* case we don't have multiple file extent items that point to the same
|
||||
* data extent.
|
||||
*/
|
||||
struct {
|
||||
u64 bytenr;
|
||||
bool is_shared;
|
||||
} prev_extents_cache[BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE];
|
||||
/*
|
||||
* The slot in the prev_extents_cache array that will be used for
|
||||
* storing the sharedness result of a new data extent.
|
||||
*/
|
||||
int prev_extents_cache_slot;
|
||||
};
|
||||
|
||||
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
|
||||
void *ctx);
|
||||
struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void);
|
||||
void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx);
|
||||
|
||||
int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
|
||||
struct btrfs_path *path, struct btrfs_key *found_key,
|
||||
|
@ -43,11 +203,9 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
|
|||
struct btrfs_key *key, struct btrfs_extent_item *ei,
|
||||
u32 item_size, u64 *out_root, u8 *out_level);
|
||||
|
||||
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
|
||||
u64 extent_item_objectid,
|
||||
u64 extent_offset, int search_commit_root,
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
bool ignore_offset);
|
||||
int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx,
|
||||
bool search_commit_root,
|
||||
iterate_extent_inodes_t *iterate, void *user_ctx);
|
||||
|
||||
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path, void *ctx,
|
||||
|
@ -55,13 +213,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
|||
|
||||
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
|
||||
|
||||
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **leafs,
|
||||
const u64 *extent_item_pos, bool ignore_offset);
|
||||
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **roots,
|
||||
int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx);
|
||||
int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx,
|
||||
bool skip_commit_root_sem);
|
||||
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||
u32 name_len, unsigned long name_off,
|
||||
|
@ -77,10 +230,9 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
|
|||
u64 start_off, struct btrfs_path *path,
|
||||
struct btrfs_inode_extref **ret_extref,
|
||||
u64 *found_off);
|
||||
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
|
||||
u64 extent_gen,
|
||||
struct ulist *roots, struct ulist *tmp,
|
||||
struct btrfs_backref_shared_cache *cache);
|
||||
struct btrfs_backref_share_check_ctx *ctx);
|
||||
|
||||
int __init btrfs_prelim_ref_init(void);
|
||||
void __cold btrfs_prelim_ref_exit(void);
|
||||
|
@ -111,8 +263,7 @@ struct btrfs_backref_iter {
|
|||
u32 end_ptr;
|
||||
};
|
||||
|
||||
struct btrfs_backref_iter *btrfs_backref_iter_alloc(
|
||||
struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
|
||||
struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info);
|
||||
|
||||
static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,381 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
* Copyright (C) 2022 Christoph Hellwig.
|
||||
*/
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include "bio.h"
|
||||
#include "ctree.h"
|
||||
#include "volumes.h"
|
||||
#include "raid56.h"
|
||||
#include "async-thread.h"
|
||||
#include "check-integrity.h"
|
||||
#include "dev-replace.h"
|
||||
#include "rcu-string.h"
|
||||
#include "zoned.h"
|
||||
|
||||
static struct bio_set btrfs_bioset;
|
||||
|
||||
/*
|
||||
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
|
||||
* is already initialized by the block layer.
|
||||
*/
|
||||
static inline void btrfs_bio_init(struct btrfs_bio *bbio,
|
||||
btrfs_bio_end_io_t end_io, void *private)
|
||||
{
|
||||
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
|
||||
bbio->end_io = end_io;
|
||||
bbio->private = private;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
|
||||
* btrfs, and is used for all I/O submitted through btrfs_submit_bio.
|
||||
*
|
||||
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
|
||||
* a mempool.
|
||||
*/
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
|
||||
btrfs_bio_end_io_t end_io, void *private)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
|
||||
btrfs_bio_init(btrfs_bio(bio), end_io, private);
|
||||
return bio;
|
||||
}
|
||||
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
|
||||
btrfs_bio_end_io_t end_io, void *private)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct btrfs_bio *bbio;
|
||||
|
||||
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
|
||||
|
||||
bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
|
||||
bbio = btrfs_bio(bio);
|
||||
btrfs_bio_init(bbio, end_io, private);
|
||||
|
||||
bio_trim(bio, offset >> 9, size >> 9);
|
||||
bbio->iter = bio->bi_iter;
|
||||
return bio;
|
||||
}
|
||||
|
||||
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
|
||||
{
|
||||
if (!dev || !dev->bdev)
|
||||
return;
|
||||
if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
|
||||
return;
|
||||
|
||||
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
if (!(bio->bi_opf & REQ_RAHEAD))
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
|
||||
}
|
||||
|
||||
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
|
||||
struct bio *bio)
|
||||
{
|
||||
if (bio->bi_opf & REQ_META)
|
||||
return fs_info->endio_meta_workers;
|
||||
return fs_info->endio_workers;
|
||||
}
|
||||
|
||||
static void btrfs_end_bio_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
|
||||
|
||||
bbio->end_io(bbio);
|
||||
}
|
||||
|
||||
static void btrfs_simple_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
|
||||
if (bio->bi_status)
|
||||
btrfs_log_dev_io_error(bio, bbio->device);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ) {
|
||||
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
|
||||
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
|
||||
} else {
|
||||
bbio->end_io(bbio);
|
||||
}
|
||||
}
|
||||
|
||||
static void btrfs_raid56_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_context *bioc = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
btrfs_bio_counter_dec(bioc->fs_info);
|
||||
bbio->mirror_num = bioc->mirror_num;
|
||||
bbio->end_io(bbio);
|
||||
|
||||
btrfs_put_bioc(bioc);
|
||||
}
|
||||
|
||||
static void btrfs_orig_write_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_stripe *stripe = bio->bi_private;
|
||||
struct btrfs_io_context *bioc = stripe->bioc;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
btrfs_bio_counter_dec(bioc->fs_info);
|
||||
|
||||
if (bio->bi_status) {
|
||||
atomic_inc(&bioc->error);
|
||||
btrfs_log_dev_io_error(bio, stripe->dev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only send an error to the higher layers if it is beyond the tolerance
|
||||
* threshold.
|
||||
*/
|
||||
if (atomic_read(&bioc->error) > bioc->max_errors)
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
else
|
||||
bio->bi_status = BLK_STS_OK;
|
||||
|
||||
bbio->end_io(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
}
|
||||
|
||||
static void btrfs_clone_write_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_stripe *stripe = bio->bi_private;
|
||||
|
||||
if (bio->bi_status) {
|
||||
atomic_inc(&stripe->bioc->error);
|
||||
btrfs_log_dev_io_error(bio, stripe->dev);
|
||||
}
|
||||
|
||||
/* Pass on control to the original bio this one was cloned from */
|
||||
bio_endio(stripe->bioc->orig_bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
|
||||
{
|
||||
if (!dev || !dev->bdev ||
|
||||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
|
||||
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
|
||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_set_dev(bio, dev->bdev);
|
||||
|
||||
/*
|
||||
* For zone append writing, bi_sector must point the beginning of the
|
||||
* zone
|
||||
*/
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||
|
||||
if (btrfs_dev_is_sequential(dev, physical)) {
|
||||
u64 zone_start = round_down(physical,
|
||||
dev->fs_info->zone_size);
|
||||
|
||||
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
|
||||
} else {
|
||||
bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
|
||||
bio->bi_opf |= REQ_OP_WRITE;
|
||||
}
|
||||
}
|
||||
btrfs_debug_in_rcu(dev->fs_info,
|
||||
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
|
||||
__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
|
||||
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
|
||||
dev->devid, bio->bi_iter.bi_size);
|
||||
|
||||
btrfsic_check_bio(bio);
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
|
||||
{
|
||||
struct bio *orig_bio = bioc->orig_bio, *bio;
|
||||
|
||||
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
|
||||
|
||||
/* Reuse the bio embedded into the btrfs_bio for the last mirror */
|
||||
if (dev_nr == bioc->num_stripes - 1) {
|
||||
bio = orig_bio;
|
||||
bio->bi_end_io = btrfs_orig_write_end_io;
|
||||
} else {
|
||||
bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
|
||||
bio_inc_remaining(orig_bio);
|
||||
bio->bi_end_io = btrfs_clone_write_end_io;
|
||||
}
|
||||
|
||||
bio->bi_private = &bioc->stripes[dev_nr];
|
||||
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
|
||||
bioc->stripes[dev_nr].bioc = bioc;
|
||||
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
|
||||
}
|
||||
|
||||
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
|
||||
{
|
||||
u64 logical = bio->bi_iter.bi_sector << 9;
|
||||
u64 length = bio->bi_iter.bi_size;
|
||||
u64 map_length = length;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
struct btrfs_io_stripe smap;
|
||||
int ret;
|
||||
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
|
||||
&bioc, &smap, &mirror_num, 1);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
if (map_length < length) {
|
||||
btrfs_crit(fs_info,
|
||||
"mapping failed logical %llu bio len %llu len %llu",
|
||||
logical, length, map_length);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (!bioc) {
|
||||
/* Single mirror read/write fast path */
|
||||
btrfs_bio(bio)->mirror_num = mirror_num;
|
||||
btrfs_bio(bio)->device = smap.dev;
|
||||
bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
|
||||
bio->bi_private = fs_info;
|
||||
bio->bi_end_io = btrfs_simple_end_io;
|
||||
btrfs_submit_dev_bio(smap.dev, bio);
|
||||
} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
/* Parity RAID write or read recovery */
|
||||
bio->bi_private = bioc;
|
||||
bio->bi_end_io = btrfs_raid56_end_io;
|
||||
if (bio_op(bio) == REQ_OP_READ)
|
||||
raid56_parity_recover(bio, bioc, mirror_num);
|
||||
else
|
||||
raid56_parity_write(bio, bioc);
|
||||
} else {
|
||||
/* Write to multiple mirrors */
|
||||
int total_devs = bioc->num_stripes;
|
||||
int dev_nr;
|
||||
|
||||
bioc->orig_bio = bio;
|
||||
for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
|
||||
btrfs_submit_mirrored_bio(bioc, dev_nr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit a repair write.
|
||||
*
|
||||
* This bypasses btrfs_submit_bio deliberately, as that writes all copies in a
|
||||
* RAID setup. Here we only want to write the one bad copy, so we do the
|
||||
* mapping ourselves and submit the bio directly.
|
||||
*
|
||||
* The I/O is issued sychronously to block the repair read completion from
|
||||
* freeing the bio.
|
||||
*/
|
||||
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num)
|
||||
{
|
||||
struct btrfs_device *dev;
|
||||
struct bio_vec bvec;
|
||||
struct bio bio;
|
||||
u64 map_length = 0;
|
||||
u64 sector;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
int ret = 0;
|
||||
|
||||
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
|
||||
BUG_ON(!mirror_num);
|
||||
|
||||
if (btrfs_repair_one_zone(fs_info, logical))
|
||||
return 0;
|
||||
|
||||
map_length = length;
|
||||
|
||||
/*
|
||||
* Avoid races with device replace and make sure our bioc has devices
|
||||
* associated to its stripes that don't go away while we are doing the
|
||||
* read repair operation.
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
if (btrfs_is_parity_mirror(fs_info, logical, length)) {
|
||||
/*
|
||||
* Note that we don't use BTRFS_MAP_WRITE because it's supposed
|
||||
* to update all raid stripes, but here we just want to correct
|
||||
* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
|
||||
* stripe's dev and sector.
|
||||
*/
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
|
||||
&map_length, &bioc, 0);
|
||||
if (ret)
|
||||
goto out_counter_dec;
|
||||
ASSERT(bioc->mirror_num == 1);
|
||||
} else {
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
|
||||
&map_length, &bioc, mirror_num);
|
||||
if (ret)
|
||||
goto out_counter_dec;
|
||||
BUG_ON(mirror_num != bioc->mirror_num);
|
||||
}
|
||||
|
||||
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
|
||||
dev = bioc->stripes[bioc->mirror_num - 1].dev;
|
||||
btrfs_put_bioc(bioc);
|
||||
|
||||
if (!dev || !dev->bdev ||
|
||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
|
||||
ret = -EIO;
|
||||
goto out_counter_dec;
|
||||
}
|
||||
|
||||
bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
|
||||
bio.bi_iter.bi_sector = sector;
|
||||
__bio_add_page(&bio, page, length, pg_offset);
|
||||
|
||||
btrfsic_check_bio(&bio);
|
||||
ret = submit_bio_wait(&bio);
|
||||
if (ret) {
|
||||
/* try to remap that extent elsewhere? */
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
goto out_bio_uninit;
|
||||
}
|
||||
|
||||
btrfs_info_rl_in_rcu(fs_info,
|
||||
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
|
||||
ino, start, btrfs_dev_name(dev), sector);
|
||||
ret = 0;
|
||||
|
||||
out_bio_uninit:
|
||||
bio_uninit(&bio);
|
||||
out_counter_dec:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __init btrfs_bioset_init(void)
|
||||
{
|
||||
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
|
||||
offsetof(struct btrfs_bio, bio),
|
||||
BIOSET_NEED_BVECS))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_bioset_exit(void)
|
||||
{
|
||||
bioset_exit(&btrfs_bioset);
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
* Copyright (C) 2022 Christoph Hellwig.
|
||||
*/
|
||||
|
||||
#ifndef BTRFS_BIO_H
|
||||
#define BTRFS_BIO_H
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include "tree-checker.h"
|
||||
|
||||
struct btrfs_bio;
|
||||
struct btrfs_fs_info;
|
||||
|
||||
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
|
||||
|
||||
/*
|
||||
* Maximum number of sectors for a single bio to limit the size of the
|
||||
* checksum array. This matches the number of bio_vecs per bio and thus the
|
||||
* I/O size for buffered I/O.
|
||||
*/
|
||||
#define BTRFS_MAX_BIO_SECTORS (256)
|
||||
|
||||
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
|
||||
|
||||
/*
|
||||
* Additional info to pass along bio.
|
||||
*
|
||||
* Mostly for btrfs specific features like csum and mirror_num.
|
||||
*/
|
||||
struct btrfs_bio {
|
||||
unsigned int mirror_num:7;
|
||||
|
||||
/*
|
||||
* Extra indicator for metadata bios.
|
||||
* For some btrfs bios they use pages without a mapping, thus
|
||||
* we can not rely on page->mapping->host to determine if
|
||||
* it's a metadata bio.
|
||||
*/
|
||||
unsigned int is_metadata:1;
|
||||
struct bvec_iter iter;
|
||||
|
||||
/* for direct I/O */
|
||||
u64 file_offset;
|
||||
|
||||
/* @device is for stripe IO submission. */
|
||||
struct btrfs_device *device;
|
||||
union {
|
||||
/* For data checksum verification. */
|
||||
struct {
|
||||
u8 *csum;
|
||||
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
||||
};
|
||||
|
||||
/* For metadata parentness verification. */
|
||||
struct btrfs_tree_parent_check parent_check;
|
||||
};
|
||||
|
||||
/* End I/O information supplied to btrfs_bio_alloc */
|
||||
btrfs_bio_end_io_t end_io;
|
||||
void *private;
|
||||
|
||||
/* For read end I/O handling */
|
||||
struct work_struct end_io_work;
|
||||
|
||||
/*
|
||||
* This member must come last, bio_alloc_bioset will allocate enough
|
||||
* bytes for entire btrfs_bio but relies on bio being last.
|
||||
*/
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
|
||||
{
|
||||
return container_of(bio, struct btrfs_bio, bio);
|
||||
}
|
||||
|
||||
int __init btrfs_bioset_init(void);
|
||||
void __cold btrfs_bioset_exit(void);
|
||||
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
|
||||
btrfs_bio_end_io_t end_io, void *private);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
|
||||
btrfs_bio_end_io_t end_io, void *private);
|
||||
|
||||
|
||||
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
|
||||
{
|
||||
bbio->bio.bi_status = status;
|
||||
bbio->end_io(bbio);
|
||||
}
|
||||
|
||||
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
|
||||
{
|
||||
if (bbio->is_metadata)
|
||||
return;
|
||||
if (bbio->csum != bbio->csum_inline) {
|
||||
kfree(bbio->csum);
|
||||
bbio->csum = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate through a btrfs_bio (@bbio) on a per-sector basis.
|
||||
*
|
||||
* bvl - struct bio_vec
|
||||
* bbio - struct btrfs_bio
|
||||
* iters - struct bvec_iter
|
||||
* bio_offset - unsigned int
|
||||
*/
|
||||
#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset) \
|
||||
for ((iter) = (bbio)->iter, (bio_offset) = 0; \
|
||||
(iter).bi_size && \
|
||||
(((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1); \
|
||||
(bio_offset) += fs_info->sectorsize, \
|
||||
bio_advance_iter_single(&(bbio)->bio, &(iter), \
|
||||
(fs_info)->sectorsize))
|
||||
|
||||
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
int mirror_num);
|
||||
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num);
|
||||
|
||||
#endif
|
|
@ -17,6 +17,21 @@
|
|||
#include "discard.h"
|
||||
#include "raid56.h"
|
||||
#include "zoned.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
|
||||
return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
|
||||
(btrfs_test_opt(fs_info, FRAGMENT_DATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return target flags in extended format or 0 if restripe for this chunk_type
|
||||
|
@ -284,7 +299,7 @@ struct btrfs_block_group *btrfs_next_block_group(
|
|||
return cache;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Check if we can do a NOCOW write for a given extent.
|
||||
*
|
||||
* @fs_info: The filesystem information object.
|
||||
|
@ -325,11 +340,9 @@ struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info,
|
|||
return bg;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Decrement the number of NOCOW writers in a block group.
|
||||
*
|
||||
* @bg: The block group.
|
||||
*
|
||||
* This is meant to be called after a previous call to btrfs_inc_nocow_writers(),
|
||||
* and on the block group returned by that call. Typically this is called after
|
||||
* creating an ordered extent for a NOCOW write, to prevent races with scrub and
|
||||
|
@ -1527,6 +1540,30 @@ static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
|
||||
{
|
||||
const struct btrfs_space_info *space_info = bg->space_info;
|
||||
const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
|
||||
const u64 new_val = bg->used;
|
||||
const u64 old_val = new_val + bytes_freed;
|
||||
u64 thresh;
|
||||
|
||||
if (reclaim_thresh == 0)
|
||||
return false;
|
||||
|
||||
thresh = mult_perc(bg->length, reclaim_thresh);
|
||||
|
||||
/*
|
||||
* If we were below the threshold before don't reclaim, we are likely a
|
||||
* brand new block group and we don't want to relocate new block groups.
|
||||
*/
|
||||
if (old_val < thresh)
|
||||
return false;
|
||||
if (new_val >= thresh)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info =
|
||||
|
@ -1594,6 +1631,40 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
|||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
}
|
||||
if (bg->used == 0) {
|
||||
/*
|
||||
* It is possible that we trigger relocation on a block
|
||||
* group as its extents are deleted and it first goes
|
||||
* below the threshold, then shortly after goes empty.
|
||||
*
|
||||
* In this case, relocating it does delete it, but has
|
||||
* some overhead in relocation specific metadata, looking
|
||||
* for the non-existent extents and running some extra
|
||||
* transactions, which we can avoid by using one of the
|
||||
* other mechanisms for dealing with empty block groups.
|
||||
*/
|
||||
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||
btrfs_mark_bg_unused(bg);
|
||||
spin_unlock(&bg->lock);
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
|
||||
}
|
||||
/*
|
||||
* The block group might no longer meet the reclaim condition by
|
||||
* the time we get around to reclaiming it, so to avoid
|
||||
* reclaiming overly full block_groups, skip reclaiming them.
|
||||
*
|
||||
* Since the decision making process also depends on the amount
|
||||
* being freed, pass in a fake giant value to skip that extra
|
||||
* check, which is more meaningful when adding to the list in
|
||||
* the first place.
|
||||
*/
|
||||
if (!should_reclaim_block_group(bg, bg->length)) {
|
||||
spin_unlock(&bg->lock);
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
}
|
||||
spin_unlock(&bg->lock);
|
||||
|
||||
/* Get out fast, in case we're unmounting the filesystem */
|
||||
|
@ -1740,8 +1811,8 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
|||
write_sequnlock(&fs_info->profiles_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a physical disk address to a list of logical addresses
|
||||
/*
|
||||
* Map a physical disk address to a list of logical addresses.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @chunk_start: logical address of block group
|
||||
|
@ -2001,6 +2072,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
|||
|
||||
cache->length = key->offset;
|
||||
cache->used = btrfs_stack_block_group_used(bgi);
|
||||
cache->commit_used = cache->used;
|
||||
cache->flags = btrfs_stack_block_group_flags(bgi);
|
||||
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
|
||||
|
||||
|
@ -2481,7 +2553,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
|||
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
|
||||
|
||||
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
|
||||
cache->needs_free_space = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &cache->runtime_flags);
|
||||
|
||||
ret = btrfs_load_block_group_zone_info(cache, true);
|
||||
if (ret) {
|
||||
|
@ -2692,6 +2764,25 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
|
|||
struct extent_buffer *leaf;
|
||||
struct btrfs_block_group_item bgi;
|
||||
struct btrfs_key key;
|
||||
u64 old_commit_used;
|
||||
u64 used;
|
||||
|
||||
/*
|
||||
* Block group items update can be triggered out of commit transaction
|
||||
* critical section, thus we need a consistent view of used bytes.
|
||||
* We cannot use cache->used directly outside of the spin lock, as it
|
||||
* may be changed.
|
||||
*/
|
||||
spin_lock(&cache->lock);
|
||||
old_commit_used = cache->commit_used;
|
||||
used = cache->used;
|
||||
/* No change in used bytes, can safely skip it. */
|
||||
if (cache->commit_used == used) {
|
||||
spin_unlock(&cache->lock);
|
||||
return 0;
|
||||
}
|
||||
cache->commit_used = used;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
key.objectid = cache->start;
|
||||
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
||||
|
@ -2706,7 +2797,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
|
|||
|
||||
leaf = path->nodes[0];
|
||||
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
btrfs_set_stack_block_group_used(&bgi, cache->used);
|
||||
btrfs_set_stack_block_group_used(&bgi, used);
|
||||
btrfs_set_stack_block_group_chunk_objectid(&bgi,
|
||||
cache->global_root_id);
|
||||
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
|
||||
|
@ -2714,6 +2805,12 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
|
|||
btrfs_mark_buffer_dirty(leaf);
|
||||
fail:
|
||||
btrfs_release_path(path);
|
||||
/* We didn't update the block group item, need to revert @commit_used. */
|
||||
if (ret < 0) {
|
||||
spin_lock(&cache->lock);
|
||||
cache->commit_used = old_commit_used;
|
||||
spin_unlock(&cache->lock);
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
@ -3211,31 +3308,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline bool should_reclaim_block_group(struct btrfs_block_group *bg,
|
||||
u64 bytes_freed)
|
||||
{
|
||||
const struct btrfs_space_info *space_info = bg->space_info;
|
||||
const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
|
||||
const u64 new_val = bg->used;
|
||||
const u64 old_val = new_val + bytes_freed;
|
||||
u64 thresh;
|
||||
|
||||
if (reclaim_thresh == 0)
|
||||
return false;
|
||||
|
||||
thresh = div_factor_fine(bg->length, reclaim_thresh);
|
||||
|
||||
/*
|
||||
* If we were below the threshold before don't reclaim, we are likely a
|
||||
* brand new block group and we don't want to relocate new block groups.
|
||||
*/
|
||||
if (old_val < thresh)
|
||||
return false;
|
||||
if (new_val >= thresh)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, bool alloc)
|
||||
{
|
||||
|
@ -3347,8 +3419,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_add_reserved_bytes - update the block_group and space info counters
|
||||
/*
|
||||
* Update the block_group and space info counters.
|
||||
*
|
||||
* @cache: The cache we are manipulating
|
||||
* @ram_bytes: The number of bytes of file content, and will be same to
|
||||
* @num_bytes except for the compress path.
|
||||
|
@ -3391,8 +3464,9 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_free_reserved_bytes - update the block_group and space info counters
|
||||
/*
|
||||
* Update the block_group and space info counters.
|
||||
*
|
||||
* @cache: The cache we are manipulating
|
||||
* @num_bytes: The number of bytes in question
|
||||
* @delalloc: The blocks are allocated for the delalloc write
|
||||
|
@ -3449,13 +3523,13 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
|
|||
*/
|
||||
if (force == CHUNK_ALLOC_LIMITED) {
|
||||
thresh = btrfs_super_total_bytes(fs_info->super_copy);
|
||||
thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
|
||||
thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1));
|
||||
|
||||
if (sinfo->total_bytes - bytes_used < thresh)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
|
||||
if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -55,6 +55,10 @@ enum btrfs_block_group_flags {
|
|||
BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
|
||||
BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
/* Does the block group need to be added to the free space tree? */
|
||||
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
|
||||
/* Indicate that the block group is placed on a sequential zone */
|
||||
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
|
||||
};
|
||||
|
||||
enum btrfs_caching_type {
|
||||
|
@ -99,6 +103,12 @@ struct btrfs_block_group {
|
|||
u64 cache_generation;
|
||||
u64 global_root_id;
|
||||
|
||||
/*
|
||||
* The last committed used bytes of this block group, if the above @used
|
||||
* is still the same as @commit_used, we don't need to update block
|
||||
* group item of this block group.
|
||||
*/
|
||||
u64 commit_used;
|
||||
/*
|
||||
* If the free space extent count exceeds this number, convert the block
|
||||
* group to bitmaps.
|
||||
|
@ -202,15 +212,6 @@ struct btrfs_block_group {
|
|||
/* Lock for free space tree operations. */
|
||||
struct mutex free_space_lock;
|
||||
|
||||
/*
|
||||
* Does the block group need to be added to the free space tree?
|
||||
* Protected by free_space_lock.
|
||||
*/
|
||||
int needs_free_space;
|
||||
|
||||
/* Flag indicating this block group is placed on a sequential zone */
|
||||
bool seq_zone;
|
||||
|
||||
/*
|
||||
* Number of extents in this block group used for swap files.
|
||||
* All accesses protected by the spinlock 'lock'.
|
||||
|
@ -251,16 +252,7 @@ static inline bool btrfs_is_block_group_data_only(
|
|||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static inline int btrfs_should_fragment_free_space(
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
|
||||
return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
|
||||
(btrfs_test_opt(fs_info, FRAGMENT_DATA) &&
|
||||
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
|
||||
}
|
||||
int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group);
|
||||
#endif
|
||||
|
||||
struct btrfs_block_group *btrfs_lookup_first_block_group(
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#include "transaction.h"
|
||||
#include "block-group.h"
|
||||
#include "disk-io.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
|
||||
/*
|
||||
* HOW DO BLOCK RESERVES WORK
|
||||
|
@ -225,7 +227,7 @@ int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_percent)
|
||||
{
|
||||
u64 num_bytes = 0;
|
||||
int ret = -ENOSPC;
|
||||
|
@ -234,7 +236,7 @@ int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
|
|||
return 0;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
num_bytes = div_factor(block_rsv->size, min_factor);
|
||||
num_bytes = mult_perc(block_rsv->size, min_percent);
|
||||
if (block_rsv->reserved >= num_bytes)
|
||||
ret = 0;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
@ -323,31 +325,6 @@ void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
|||
spin_unlock(&block_rsv->lock);
|
||||
}
|
||||
|
||||
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
int min_factor)
|
||||
{
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
u64 min_bytes;
|
||||
|
||||
if (global_rsv->space_info != dest->space_info)
|
||||
return -ENOSPC;
|
||||
|
||||
spin_lock(&global_rsv->lock);
|
||||
min_bytes = div_factor(global_rsv->size, min_factor);
|
||||
if (global_rsv->reserved < min_bytes + num_bytes) {
|
||||
spin_unlock(&global_rsv->lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
global_rsv->reserved -= num_bytes;
|
||||
if (global_rsv->reserved < global_rsv->size)
|
||||
global_rsv->full = false;
|
||||
spin_unlock(&global_rsv->lock);
|
||||
|
||||
btrfs_block_rsv_add_bytes(dest, num_bytes, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
||||
|
@ -552,5 +529,17 @@ try_reserve:
|
|||
if (!ret)
|
||||
return global_rsv;
|
||||
}
|
||||
|
||||
/*
|
||||
* All hope is lost, but of course our reservations are overly
|
||||
* pessimistic, so instead of possibly having an ENOSPC abort here, try
|
||||
* one last time to force a reservation if there's enough actual space
|
||||
* on disk to make the reservation.
|
||||
*/
|
||||
ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, blocksize,
|
||||
BTRFS_RESERVE_FLUSH_EMERGENCY);
|
||||
if (!ret)
|
||||
return block_rsv;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#define BTRFS_BLOCK_RSV_H
|
||||
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_root;
|
||||
enum btrfs_reserve_flush_enum;
|
||||
|
||||
/*
|
||||
|
@ -62,7 +63,7 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
|||
int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_percent);
|
||||
int btrfs_block_rsv_refill(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
|
@ -70,9 +71,6 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
|
|||
struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
|
||||
bool update_size);
|
||||
int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes);
|
||||
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
int min_factor);
|
||||
void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, bool update_size);
|
||||
u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
|
|
|
@ -411,29 +411,142 @@ static inline void btrfs_inode_split_flags(u64 inode_item_flags,
|
|||
#define CSUM_FMT "0x%*phN"
|
||||
#define CSUM_FMT_VALUE(size, bytes) size, bytes
|
||||
|
||||
static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
|
||||
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
const u32 csum_size = root->fs_info->csum_size;
|
||||
void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num);
|
||||
void btrfs_submit_data_read_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
int mirror_num, enum btrfs_compression_type compress_type);
|
||||
void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num);
|
||||
blk_status_t btrfs_submit_bio_start(struct btrfs_inode *inode, struct bio *bio);
|
||||
blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode,
|
||||
struct bio *bio,
|
||||
u64 dio_file_offset);
|
||||
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
|
||||
u32 pgoff, u8 *csum, const u8 * const csum_expected);
|
||||
int btrfs_check_data_csum(struct btrfs_inode *inode, struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page, u32 pgoff);
|
||||
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page,
|
||||
u64 start, u64 end);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
u64 *orig_start, u64 *orig_block_len,
|
||||
u64 *ram_bytes, bool nowait, bool strict);
|
||||
|
||||
/* Output minus objectid, which is more meaningful */
|
||||
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
|
||||
btrfs_warn_rl(root->fs_info,
|
||||
"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
|
||||
root->root_key.objectid, btrfs_ino(inode),
|
||||
logical_start,
|
||||
CSUM_FMT_VALUE(csum_size, csum),
|
||||
CSUM_FMT_VALUE(csum_size, csum_expected),
|
||||
mirror_num);
|
||||
else
|
||||
btrfs_warn_rl(root->fs_info,
|
||||
"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
|
||||
root->root_key.objectid, btrfs_ino(inode),
|
||||
logical_start,
|
||||
CSUM_FMT_VALUE(csum_size, csum),
|
||||
CSUM_FMT_VALUE(csum_size, csum_expected),
|
||||
mirror_num);
|
||||
}
|
||||
void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode);
|
||||
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
|
||||
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
|
||||
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *dir, struct btrfs_inode *inode,
|
||||
const struct fscrypt_str *name);
|
||||
int btrfs_add_link(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
|
||||
const struct fscrypt_str *name, int add_backref, u64 index);
|
||||
int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry);
|
||||
int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
|
||||
int front);
|
||||
|
||||
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
|
||||
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
|
||||
bool in_reclaim_context);
|
||||
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
unsigned int extra_bits,
|
||||
struct extent_state **cached_state);
|
||||
|
||||
struct btrfs_new_inode_args {
|
||||
/* Input */
|
||||
struct inode *dir;
|
||||
struct dentry *dentry;
|
||||
struct inode *inode;
|
||||
bool orphan;
|
||||
bool subvol;
|
||||
|
||||
/* Output from btrfs_new_inode_prepare(), input to btrfs_create_new_inode(). */
|
||||
struct posix_acl *default_acl;
|
||||
struct posix_acl *acl;
|
||||
struct fscrypt_name fname;
|
||||
};
|
||||
|
||||
int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
|
||||
unsigned int *trans_num_items);
|
||||
int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_new_inode_args *args);
|
||||
void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args);
|
||||
struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
|
||||
struct inode *dir);
|
||||
void btrfs_set_delalloc_extent(struct btrfs_inode *inode, struct extent_state *state,
|
||||
u32 bits);
|
||||
void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
|
||||
struct extent_state *state, u32 bits);
|
||||
void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state *new,
|
||||
struct extent_state *other);
|
||||
void btrfs_split_delalloc_extent(struct btrfs_inode *inode,
|
||||
struct extent_state *orig, u64 split);
|
||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
|
||||
void btrfs_evict_inode(struct inode *inode);
|
||||
struct inode *btrfs_alloc_inode(struct super_block *sb);
|
||||
void btrfs_destroy_inode(struct inode *inode);
|
||||
void btrfs_free_inode(struct inode *inode);
|
||||
int btrfs_drop_inode(struct inode *inode);
|
||||
int __init btrfs_init_cachep(void);
|
||||
void __cold btrfs_destroy_cachep(void);
|
||||
struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
|
||||
struct btrfs_root *root, struct btrfs_path *path);
|
||||
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
|
||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 end);
|
||||
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_inode *inode);
|
||||
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_inode *inode);
|
||||
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct btrfs_inode *inode);
|
||||
int btrfs_orphan_cleanup(struct btrfs_root *root);
|
||||
int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size);
|
||||
void btrfs_add_delayed_iput(struct btrfs_inode *inode);
|
||||
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
u64 start, u64 num_bytes, u64 min_size,
|
||||
loff_t actual_len, u64 *alloc_hint);
|
||||
int btrfs_prealloc_file_range_trans(struct inode *inode,
|
||||
struct btrfs_trans_handle *trans, int mode,
|
||||
u64 start, u64 num_bytes, u64 min_size,
|
||||
loff_t actual_len, u64 *alloc_hint);
|
||||
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
|
||||
u64 start, u64 end, int *page_started,
|
||||
unsigned long *nr_written, struct writeback_control *wbc);
|
||||
int btrfs_writepage_cow_fixup(struct page *page);
|
||||
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start,
|
||||
u64 end, bool uptodate);
|
||||
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
|
||||
int compress_type);
|
||||
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
u64 file_offset, u64 disk_bytenr,
|
||||
u64 disk_io_size,
|
||||
struct page **pages);
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
size_t done_before);
|
||||
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
|
||||
size_t done_before);
|
||||
|
||||
extern const struct dentry_operations btrfs_dentry_operations;
|
||||
|
||||
/* Inode locking type flags, by default the exclusive lock is taken. */
|
||||
enum btrfs_ilock_type {
|
||||
ENUM_BIT(BTRFS_ILOCK_SHARED),
|
||||
ENUM_BIT(BTRFS_ILOCK_TRY),
|
||||
ENUM_BIT(BTRFS_ILOCK_MMAP),
|
||||
};
|
||||
|
||||
int btrfs_inode_lock(struct btrfs_inode *inode, unsigned int ilock_flags);
|
||||
void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags);
|
||||
void btrfs_update_inode_bytes(struct btrfs_inode *inode, const u64 add_bytes,
|
||||
const u64 del_bytes);
|
||||
void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -82,6 +82,7 @@
|
|||
#include <linux/mm.h>
|
||||
#include <linux/string.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
|
@ -92,6 +93,7 @@
|
|||
#include "check-integrity.h"
|
||||
#include "rcu-string.h"
|
||||
#include "compression.h"
|
||||
#include "accessors.h"
|
||||
|
||||
#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
|
||||
#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
|
||||
|
@ -755,7 +757,7 @@ static int btrfsic_process_superblock_dev_mirror(
|
|||
btrfs_info_in_rcu(fs_info,
|
||||
"new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)",
|
||||
superblock_bdev,
|
||||
rcu_str_deref(device->name), dev_bytenr,
|
||||
btrfs_dev_name(device), dev_bytenr,
|
||||
dev_state->bdev, dev_bytenr,
|
||||
superblock_mirror_num);
|
||||
list_add(&superblock_tmp->all_blocks_node,
|
||||
|
|
|
@ -23,16 +23,19 @@
|
|||
#include <crypto/hash.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "volumes.h"
|
||||
#include "bio.h"
|
||||
#include "ordered-data.h"
|
||||
#include "compression.h"
|
||||
#include "extent_io.h"
|
||||
#include "extent_map.h"
|
||||
#include "subpage.h"
|
||||
#include "zoned.h"
|
||||
#include "file-item.h"
|
||||
#include "super.h"
|
||||
|
||||
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
|
||||
|
||||
|
@ -116,7 +119,7 @@ static int compression_decompress_bio(struct list_head *ws,
|
|||
}
|
||||
|
||||
static int compression_decompress(int type, struct list_head *ws,
|
||||
unsigned char *data_in, struct page *dest_page,
|
||||
const u8 *data_in, struct page *dest_page,
|
||||
unsigned long start_byte, size_t srclen, size_t destlen)
|
||||
{
|
||||
switch (type) {
|
||||
|
@ -183,7 +186,7 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio)
|
|||
u64 start = bbio->file_offset + offset;
|
||||
|
||||
if (!status &&
|
||||
(!csum || !btrfs_check_data_csum(inode, bbio, offset,
|
||||
(!csum || !btrfs_check_data_csum(bi, bbio, offset,
|
||||
bv.bv_page, bv.bv_offset))) {
|
||||
btrfs_clean_io_failure(bi, start, bv.bv_page,
|
||||
bv.bv_offset);
|
||||
|
@ -191,9 +194,9 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio)
|
|||
int ret;
|
||||
|
||||
refcount_inc(&cb->pending_ios);
|
||||
ret = btrfs_repair_one_sector(inode, bbio, offset,
|
||||
ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset,
|
||||
bv.bv_page, bv.bv_offset,
|
||||
btrfs_submit_data_read_bio);
|
||||
true);
|
||||
if (ret) {
|
||||
refcount_dec(&cb->pending_ios);
|
||||
status = errno_to_blk_status(ret);
|
||||
|
@ -1229,7 +1232,7 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
|
|||
* single page, and we want to read a single page out of it.
|
||||
* start_byte tells us the offset into the compressed data we're interested in
|
||||
*/
|
||||
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
||||
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
|
||||
unsigned long start_byte, size_t srclen, size_t destlen)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
|
@ -1243,12 +1246,13 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
|||
return ret;
|
||||
}
|
||||
|
||||
void __init btrfs_init_compress(void)
|
||||
int __init btrfs_init_compress(void)
|
||||
{
|
||||
btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
|
||||
btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
|
||||
btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
|
||||
zstd_init_workspace_manager();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_exit_compress(void)
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#ifndef BTRFS_COMPRESSION_H
|
||||
#define BTRFS_COMPRESSION_H
|
||||
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
struct btrfs_inode;
|
||||
|
@ -77,7 +78,7 @@ static inline unsigned int btrfs_compress_level(unsigned int type_level)
|
|||
return ((type_level & 0xF0) >> 4);
|
||||
}
|
||||
|
||||
void __init btrfs_init_compress(void);
|
||||
int __init btrfs_init_compress(void);
|
||||
void __cold btrfs_exit_compress(void);
|
||||
|
||||
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
|
||||
|
@ -85,7 +86,7 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
|
|||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out);
|
||||
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
||||
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
|
||||
unsigned long start_byte, size_t srclen, size_t destlen);
|
||||
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
|
||||
struct compressed_bio *cb, u32 decompressed);
|
||||
|
@ -149,7 +150,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
|
|||
u64 start, struct page **pages, unsigned long *out_pages,
|
||||
unsigned long *total_in, unsigned long *total_out);
|
||||
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
||||
int zlib_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
int zlib_decompress(struct list_head *ws, const u8 *data_in,
|
||||
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
||||
size_t destlen);
|
||||
struct list_head *zlib_alloc_workspace(unsigned int level);
|
||||
|
@ -160,7 +161,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
|
|||
u64 start, struct page **pages, unsigned long *out_pages,
|
||||
unsigned long *total_in, unsigned long *total_out);
|
||||
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
||||
int lzo_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
int lzo_decompress(struct list_head *ws, const u8 *data_in,
|
||||
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
||||
size_t destlen);
|
||||
struct list_head *lzo_alloc_workspace(unsigned int level);
|
||||
|
@ -170,7 +171,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
|
|||
u64 start, struct page **pages, unsigned long *out_pages,
|
||||
unsigned long *total_in, unsigned long *total_out);
|
||||
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
||||
int zstd_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
int zstd_decompress(struct list_head *ws, const u8 *data_in,
|
||||
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
||||
size_t destlen);
|
||||
void zstd_init_workspace_manager(void);
|
||||
|
|
301
fs/btrfs/ctree.c
301
fs/btrfs/ctree.c
|
@ -8,6 +8,7 @@
|
|||
#include <linux/rbtree.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
|
@ -17,6 +18,13 @@
|
|||
#include "qgroup.h"
|
||||
#include "tree-mod-log.h"
|
||||
#include "tree-checker.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "relocation.h"
|
||||
#include "file-item.h"
|
||||
|
||||
static struct kmem_cache *btrfs_path_cachep;
|
||||
|
||||
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_path *path, int level);
|
||||
|
@ -44,6 +52,104 @@ static const struct btrfs_csums {
|
|||
.driver = "blake2b-256" },
|
||||
};
|
||||
|
||||
/*
|
||||
* The leaf data grows from end-to-front in the node. this returns the address
|
||||
* of the start of the last item, which is the stop of the leaf data stack.
|
||||
*/
|
||||
static unsigned int leaf_data_end(const struct extent_buffer *leaf)
|
||||
{
|
||||
u32 nr = btrfs_header_nritems(leaf);
|
||||
|
||||
if (nr == 0)
|
||||
return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
|
||||
return btrfs_item_offset(leaf, nr - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move data in a @leaf (using memmove, safe for overlapping ranges).
|
||||
*
|
||||
* @leaf: leaf that we're doing a memmove on
|
||||
* @dst_offset: item data offset we're moving to
|
||||
* @src_offset: item data offset were' moving from
|
||||
* @len: length of the data we're moving
|
||||
*
|
||||
* Wrapper around memmove_extent_buffer() that takes into account the header on
|
||||
* the leaf. The btrfs_item offset's start directly after the header, so we
|
||||
* have to adjust any offsets to account for the header in the leaf. This
|
||||
* handles that math to simplify the callers.
|
||||
*/
|
||||
static inline void memmove_leaf_data(const struct extent_buffer *leaf,
|
||||
unsigned long dst_offset,
|
||||
unsigned long src_offset,
|
||||
unsigned long len)
|
||||
{
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, 0) + dst_offset,
|
||||
btrfs_item_nr_offset(leaf, 0) + src_offset, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy item data from @src into @dst at the given @offset.
|
||||
*
|
||||
* @dst: destination leaf that we're copying into
|
||||
* @src: source leaf that we're copying from
|
||||
* @dst_offset: item data offset we're copying to
|
||||
* @src_offset: item data offset were' copying from
|
||||
* @len: length of the data we're copying
|
||||
*
|
||||
* Wrapper around copy_extent_buffer() that takes into account the header on
|
||||
* the leaf. The btrfs_item offset's start directly after the header, so we
|
||||
* have to adjust any offsets to account for the header in the leaf. This
|
||||
* handles that math to simplify the callers.
|
||||
*/
|
||||
static inline void copy_leaf_data(const struct extent_buffer *dst,
|
||||
const struct extent_buffer *src,
|
||||
unsigned long dst_offset,
|
||||
unsigned long src_offset, unsigned long len)
|
||||
{
|
||||
copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, 0) + dst_offset,
|
||||
btrfs_item_nr_offset(src, 0) + src_offset, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move items in a @leaf (using memmove).
|
||||
*
|
||||
* @dst: destination leaf for the items
|
||||
* @dst_item: the item nr we're copying into
|
||||
* @src_item: the item nr we're copying from
|
||||
* @nr_items: the number of items to copy
|
||||
*
|
||||
* Wrapper around memmove_extent_buffer() that does the math to get the
|
||||
* appropriate offsets into the leaf from the item numbers.
|
||||
*/
|
||||
static inline void memmove_leaf_items(const struct extent_buffer *leaf,
|
||||
int dst_item, int src_item, int nr_items)
|
||||
{
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, dst_item),
|
||||
btrfs_item_nr_offset(leaf, src_item),
|
||||
nr_items * sizeof(struct btrfs_item));
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy items from @src into @dst at the given @offset.
|
||||
*
|
||||
* @dst: destination leaf for the items
|
||||
* @src: source leaf for the items
|
||||
* @dst_item: the item nr we're copying into
|
||||
* @src_item: the item nr we're copying from
|
||||
* @nr_items: the number of items to copy
|
||||
*
|
||||
* Wrapper around copy_extent_buffer() that does the math to get the
|
||||
* appropriate offsets into the leaf from the item numbers.
|
||||
*/
|
||||
static inline void copy_leaf_items(const struct extent_buffer *dst,
|
||||
const struct extent_buffer *src,
|
||||
int dst_item, int src_item, int nr_items)
|
||||
{
|
||||
copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, dst_item),
|
||||
btrfs_item_nr_offset(src, src_item),
|
||||
nr_items * sizeof(struct btrfs_item));
|
||||
}
|
||||
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s)
|
||||
{
|
||||
u16 t = btrfs_super_csum_type(s);
|
||||
|
@ -78,6 +184,8 @@ size_t __attribute_const__ btrfs_get_num_csums(void)
|
|||
|
||||
struct btrfs_path *btrfs_alloc_path(void)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
|
||||
}
|
||||
|
||||
|
@ -487,7 +595,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
|||
} else {
|
||||
WARN_ON(trans->transid != btrfs_header_generation(parent));
|
||||
btrfs_tree_mod_log_insert_key(parent, parent_slot,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
btrfs_set_node_blockptr(parent, parent_slot,
|
||||
cow->start);
|
||||
btrfs_set_node_ptr_generation(parent, parent_slot,
|
||||
|
@ -850,19 +958,22 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
|
|||
int slot)
|
||||
{
|
||||
int level = btrfs_header_level(parent);
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
struct extent_buffer *eb;
|
||||
struct btrfs_key first_key;
|
||||
|
||||
if (slot < 0 || slot >= btrfs_header_nritems(parent))
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
BUG_ON(level == 0);
|
||||
|
||||
btrfs_node_key_to_cpu(parent, &first_key, slot);
|
||||
check.level = level - 1;
|
||||
check.transid = btrfs_node_ptr_generation(parent, slot);
|
||||
check.owner_root = btrfs_header_owner(parent);
|
||||
check.has_first_key = true;
|
||||
btrfs_node_key_to_cpu(parent, &check.first_key, slot);
|
||||
|
||||
eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
|
||||
btrfs_header_owner(parent),
|
||||
btrfs_node_ptr_generation(parent, slot),
|
||||
level - 1, &first_key);
|
||||
&check);
|
||||
if (IS_ERR(eb))
|
||||
return eb;
|
||||
if (!extent_buffer_uptodate(eb)) {
|
||||
|
@ -1016,7 +1127,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_disk_key right_key;
|
||||
btrfs_node_key(right, &right_key, 0);
|
||||
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
BUG_ON(ret < 0);
|
||||
btrfs_set_node_key(parent, &right_key, pslot + 1);
|
||||
btrfs_mark_buffer_dirty(parent);
|
||||
|
@ -1062,7 +1173,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_disk_key mid_key;
|
||||
btrfs_node_key(mid, &mid_key, 0);
|
||||
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
BUG_ON(ret < 0);
|
||||
btrfs_set_node_key(parent, &mid_key, pslot);
|
||||
btrfs_mark_buffer_dirty(parent);
|
||||
|
@ -1164,7 +1275,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
|||
orig_slot += left_nr;
|
||||
btrfs_node_key(mid, &disk_key, 0);
|
||||
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
BUG_ON(ret < 0);
|
||||
btrfs_set_node_key(parent, &disk_key, pslot);
|
||||
btrfs_mark_buffer_dirty(parent);
|
||||
|
@ -1218,7 +1329,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
|||
|
||||
btrfs_node_key(right, &disk_key, 0);
|
||||
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
BUG_ON(ret < 0);
|
||||
btrfs_set_node_key(parent, &disk_key, pslot + 1);
|
||||
btrfs_mark_buffer_dirty(parent);
|
||||
|
@ -1421,10 +1532,10 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
const struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
u64 blocknr;
|
||||
u64 gen;
|
||||
struct extent_buffer *tmp;
|
||||
struct btrfs_key first_key;
|
||||
int ret;
|
||||
int parent_level;
|
||||
bool unlock_up;
|
||||
|
@ -1433,7 +1544,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
blocknr = btrfs_node_blockptr(*eb_ret, slot);
|
||||
gen = btrfs_node_ptr_generation(*eb_ret, slot);
|
||||
parent_level = btrfs_header_level(*eb_ret);
|
||||
btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
|
||||
btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot);
|
||||
check.has_first_key = true;
|
||||
check.level = parent_level - 1;
|
||||
check.transid = gen;
|
||||
check.owner_root = root->root_key.objectid;
|
||||
|
||||
/*
|
||||
* If we need to read an extent buffer from disk and we are holding locks
|
||||
|
@ -1455,7 +1570,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
* parents (shared tree blocks).
|
||||
*/
|
||||
if (btrfs_verify_level_key(tmp,
|
||||
parent_level - 1, &first_key, gen)) {
|
||||
parent_level - 1, &check.first_key, gen)) {
|
||||
free_extent_buffer(tmp);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
@ -1472,7 +1587,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
btrfs_unlock_up_safe(p, level + 1);
|
||||
|
||||
/* now we're allowed to do a blocking uptodate check */
|
||||
ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key);
|
||||
ret = btrfs_read_extent_buffer(tmp, &check);
|
||||
if (ret) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
|
@ -1502,8 +1617,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
if (p->reada != READA_NONE)
|
||||
reada_for_search(fs_info, p, level, slot, key->objectid);
|
||||
|
||||
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
|
||||
gen, parent_level - 1, &first_key);
|
||||
tmp = read_tree_block(fs_info, blocknr, &check);
|
||||
if (IS_ERR(tmp)) {
|
||||
btrfs_release_path(p);
|
||||
return PTR_ERR(tmp);
|
||||
|
@ -1934,6 +2048,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|||
int min_write_lock_level;
|
||||
int prev_cmp;
|
||||
|
||||
might_sleep();
|
||||
|
||||
lowest_level = p->lowest_level;
|
||||
WARN_ON(lowest_level && ins_len > 0);
|
||||
WARN_ON(p->nodes[0] != NULL);
|
||||
|
@ -2357,7 +2473,7 @@ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Search for a valid slot for the given path.
|
||||
*
|
||||
* @root: The root node of the tree.
|
||||
|
@ -2416,7 +2532,7 @@ static void fixup_low_keys(struct btrfs_path *path,
|
|||
break;
|
||||
t = path->nodes[i];
|
||||
ret = btrfs_tree_mod_log_insert_key(t, tslot,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
BUG_ON(ret < 0);
|
||||
btrfs_set_node_key(t, key, tslot);
|
||||
btrfs_mark_buffer_dirty(path->nodes[i]);
|
||||
|
@ -2585,8 +2701,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
copy_extent_buffer(dst, src,
|
||||
btrfs_node_key_ptr_offset(dst_nritems),
|
||||
btrfs_node_key_ptr_offset(0),
|
||||
btrfs_node_key_ptr_offset(dst, dst_nritems),
|
||||
btrfs_node_key_ptr_offset(src, 0),
|
||||
push_items * sizeof(struct btrfs_key_ptr));
|
||||
|
||||
if (push_items < src_nritems) {
|
||||
|
@ -2594,8 +2710,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
|
|||
* Don't call btrfs_tree_mod_log_insert_move() here, key removal
|
||||
* was already fully logged by btrfs_tree_mod_log_eb_copy() above.
|
||||
*/
|
||||
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
|
||||
btrfs_node_key_ptr_offset(push_items),
|
||||
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0),
|
||||
btrfs_node_key_ptr_offset(src, push_items),
|
||||
(src_nritems - push_items) *
|
||||
sizeof(struct btrfs_key_ptr));
|
||||
}
|
||||
|
@ -2655,8 +2771,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
|
||||
BUG_ON(ret < 0);
|
||||
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
|
||||
btrfs_node_key_ptr_offset(0),
|
||||
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items),
|
||||
btrfs_node_key_ptr_offset(dst, 0),
|
||||
(dst_nritems) *
|
||||
sizeof(struct btrfs_key_ptr));
|
||||
|
||||
|
@ -2667,8 +2783,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
copy_extent_buffer(dst, src,
|
||||
btrfs_node_key_ptr_offset(0),
|
||||
btrfs_node_key_ptr_offset(src_nritems - push_items),
|
||||
btrfs_node_key_ptr_offset(dst, 0),
|
||||
btrfs_node_key_ptr_offset(src, src_nritems - push_items),
|
||||
push_items * sizeof(struct btrfs_key_ptr));
|
||||
|
||||
btrfs_set_header_nritems(src, src_nritems - push_items);
|
||||
|
@ -2771,13 +2887,13 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
|
|||
BUG_ON(ret < 0);
|
||||
}
|
||||
memmove_extent_buffer(lower,
|
||||
btrfs_node_key_ptr_offset(slot + 1),
|
||||
btrfs_node_key_ptr_offset(slot),
|
||||
btrfs_node_key_ptr_offset(lower, slot + 1),
|
||||
btrfs_node_key_ptr_offset(lower, slot),
|
||||
(nritems - slot) * sizeof(struct btrfs_key_ptr));
|
||||
}
|
||||
if (level) {
|
||||
ret = btrfs_tree_mod_log_insert_key(lower, slot,
|
||||
BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_ADD);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
btrfs_set_node_key(lower, key, slot);
|
||||
|
@ -2854,8 +2970,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
copy_extent_buffer(split, c,
|
||||
btrfs_node_key_ptr_offset(0),
|
||||
btrfs_node_key_ptr_offset(mid),
|
||||
btrfs_node_key_ptr_offset(split, 0),
|
||||
btrfs_node_key_ptr_offset(c, mid),
|
||||
(c_nritems - mid) * sizeof(struct btrfs_key_ptr));
|
||||
btrfs_set_header_nritems(split, c_nritems - mid);
|
||||
btrfs_set_header_nritems(c, mid);
|
||||
|
@ -2995,25 +3111,17 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
|
|||
|
||||
/* make room in the right data area */
|
||||
data_end = leaf_data_end(right);
|
||||
memmove_extent_buffer(right,
|
||||
BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
|
||||
BTRFS_LEAF_DATA_OFFSET + data_end,
|
||||
memmove_leaf_data(right, data_end - push_space, data_end,
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
|
||||
|
||||
/* copy from the left data area */
|
||||
copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
|
||||
BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
|
||||
push_space);
|
||||
copy_leaf_data(right, left, BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
|
||||
leaf_data_end(left), push_space);
|
||||
|
||||
memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
|
||||
btrfs_item_nr_offset(0),
|
||||
right_nritems * sizeof(struct btrfs_item));
|
||||
memmove_leaf_items(right, push_items, 0, right_nritems);
|
||||
|
||||
/* copy the items from left to right */
|
||||
copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
|
||||
btrfs_item_nr_offset(left_nritems - push_items),
|
||||
push_items * sizeof(struct btrfs_item));
|
||||
copy_leaf_items(right, left, 0, left_nritems - push_items, push_items);
|
||||
|
||||
/* update the item pointers */
|
||||
btrfs_init_map_token(&token, right);
|
||||
|
@ -3205,19 +3313,13 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
|
|||
WARN_ON(!empty && push_items == btrfs_header_nritems(right));
|
||||
|
||||
/* push data from right to left */
|
||||
copy_extent_buffer(left, right,
|
||||
btrfs_item_nr_offset(btrfs_header_nritems(left)),
|
||||
btrfs_item_nr_offset(0),
|
||||
push_items * sizeof(struct btrfs_item));
|
||||
copy_leaf_items(left, right, btrfs_header_nritems(left), 0, push_items);
|
||||
|
||||
push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
|
||||
btrfs_item_offset(right, push_items - 1);
|
||||
|
||||
copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
|
||||
leaf_data_end(left) - push_space,
|
||||
BTRFS_LEAF_DATA_OFFSET +
|
||||
btrfs_item_offset(right, push_items - 1),
|
||||
push_space);
|
||||
copy_leaf_data(left, right, leaf_data_end(left) - push_space,
|
||||
btrfs_item_offset(right, push_items - 1), push_space);
|
||||
old_left_nritems = btrfs_header_nritems(left);
|
||||
BUG_ON(old_left_nritems <= 0);
|
||||
|
||||
|
@ -3240,15 +3342,12 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
|
|||
if (push_items < right_nritems) {
|
||||
push_space = btrfs_item_offset(right, push_items - 1) -
|
||||
leaf_data_end(right);
|
||||
memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
|
||||
memmove_leaf_data(right,
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
|
||||
BTRFS_LEAF_DATA_OFFSET +
|
||||
leaf_data_end(right), push_space);
|
||||
|
||||
memmove_extent_buffer(right, btrfs_item_nr_offset(0),
|
||||
btrfs_item_nr_offset(push_items),
|
||||
(btrfs_header_nritems(right) - push_items) *
|
||||
sizeof(struct btrfs_item));
|
||||
memmove_leaf_items(right, 0, push_items,
|
||||
btrfs_header_nritems(right) - push_items);
|
||||
}
|
||||
|
||||
btrfs_init_map_token(&token, right);
|
||||
|
@ -3380,13 +3479,9 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_header_nritems(right, nritems);
|
||||
data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l);
|
||||
|
||||
copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
|
||||
btrfs_item_nr_offset(mid),
|
||||
nritems * sizeof(struct btrfs_item));
|
||||
copy_leaf_items(right, l, 0, mid, nritems);
|
||||
|
||||
copy_extent_buffer(right, l,
|
||||
BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
|
||||
data_copy_size, BTRFS_LEAF_DATA_OFFSET +
|
||||
copy_leaf_data(right, l, BTRFS_LEAF_DATA_SIZE(fs_info) - data_copy_size,
|
||||
leaf_data_end(l), data_copy_size);
|
||||
|
||||
rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
|
||||
|
@ -3757,9 +3852,7 @@ static noinline int split_item(struct btrfs_path *path,
|
|||
nritems = btrfs_header_nritems(leaf);
|
||||
if (slot != nritems) {
|
||||
/* shift the items */
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
|
||||
btrfs_item_nr_offset(slot),
|
||||
(nritems - slot) * sizeof(struct btrfs_item));
|
||||
memmove_leaf_items(leaf, slot + 1, slot, nritems - slot);
|
||||
}
|
||||
|
||||
btrfs_cpu_key_to_disk(&disk_key, new_key);
|
||||
|
@ -3870,9 +3963,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
|
|||
|
||||
/* shift the data */
|
||||
if (from_end) {
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end, old_data_start + new_size - data_end);
|
||||
memmove_leaf_data(leaf, data_end + size_diff, data_end,
|
||||
old_data_start + new_size - data_end);
|
||||
} else {
|
||||
struct btrfs_disk_key disk_key;
|
||||
u64 offset;
|
||||
|
@ -3897,9 +3989,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
|
|||
}
|
||||
}
|
||||
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end, old_data_start - data_end);
|
||||
memmove_leaf_data(leaf, data_end + size_diff, data_end,
|
||||
old_data_start - data_end);
|
||||
|
||||
offset = btrfs_disk_key_offset(&disk_key);
|
||||
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
|
||||
|
@ -3964,9 +4055,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
|
|||
}
|
||||
|
||||
/* shift the data */
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end, old_data - data_end);
|
||||
memmove_leaf_data(leaf, data_end - data_size, data_end,
|
||||
old_data - data_end);
|
||||
|
||||
data_end = old_data;
|
||||
old_size = btrfs_item_size(leaf, slot);
|
||||
|
@ -3979,14 +4069,15 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* setup_items_for_insert - Helper called before inserting one or more items
|
||||
* to a leaf. Main purpose is to save stack depth by doing the bulk of the work
|
||||
* in a function that doesn't call btrfs_search_slot
|
||||
/*
|
||||
* Make space in the node before inserting one or more items.
|
||||
*
|
||||
* @root: root we are inserting items to
|
||||
* @path: points to the leaf/slot where we are going to insert new items
|
||||
* @batch: information about the batch of items to insert
|
||||
*
|
||||
* Main purpose is to save stack depth by doing the bulk of the work in a
|
||||
* function that doesn't call btrfs_search_slot
|
||||
*/
|
||||
static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
const struct btrfs_item_batch *batch)
|
||||
|
@ -4049,15 +4140,11 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p
|
|||
ioff - batch->total_data_size);
|
||||
}
|
||||
/* shift the items */
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
|
||||
btrfs_item_nr_offset(slot),
|
||||
(nritems - slot) * sizeof(struct btrfs_item));
|
||||
memmove_leaf_items(leaf, slot + batch->nr, slot, nritems - slot);
|
||||
|
||||
/* shift the data */
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end - batch->total_data_size,
|
||||
BTRFS_LEAF_DATA_OFFSET + data_end,
|
||||
old_data - data_end);
|
||||
memmove_leaf_data(leaf, data_end - batch->total_data_size,
|
||||
data_end, old_data - data_end);
|
||||
data_end = old_data;
|
||||
}
|
||||
|
||||
|
@ -4211,13 +4298,13 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
|
|||
BUG_ON(ret < 0);
|
||||
}
|
||||
memmove_extent_buffer(parent,
|
||||
btrfs_node_key_ptr_offset(slot),
|
||||
btrfs_node_key_ptr_offset(slot + 1),
|
||||
btrfs_node_key_ptr_offset(parent, slot),
|
||||
btrfs_node_key_ptr_offset(parent, slot + 1),
|
||||
sizeof(struct btrfs_key_ptr) *
|
||||
(nritems - slot - 1));
|
||||
} else if (level) {
|
||||
ret = btrfs_tree_mod_log_insert_key(parent, slot,
|
||||
BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
|
||||
BTRFS_MOD_LOG_KEY_REMOVE);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
|
@ -4292,9 +4379,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|||
for (i = 0; i < nr; i++)
|
||||
dsize += btrfs_item_size(leaf, slot + i);
|
||||
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end + dsize,
|
||||
BTRFS_LEAF_DATA_OFFSET + data_end,
|
||||
memmove_leaf_data(leaf, data_end + dsize, data_end,
|
||||
last_off - data_end);
|
||||
|
||||
btrfs_init_map_token(&token, leaf);
|
||||
|
@ -4305,10 +4390,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|||
btrfs_set_token_item_offset(&token, i, ioff + dsize);
|
||||
}
|
||||
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
|
||||
btrfs_item_nr_offset(slot + nr),
|
||||
sizeof(struct btrfs_item) *
|
||||
(nritems - slot - nr));
|
||||
memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr);
|
||||
}
|
||||
btrfs_set_header_nritems(leaf, nritems - nr);
|
||||
nritems -= nr;
|
||||
|
@ -4850,6 +4932,14 @@ done:
|
|||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq)
|
||||
{
|
||||
path->slots[0]++;
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
|
||||
return btrfs_next_old_leaf(root, path, time_seq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
|
||||
* searching until it gets past min_objectid or finds an item of 'type'
|
||||
|
@ -4933,3 +5023,18 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
|
|||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int __init btrfs_ctree_init(void)
|
||||
{
|
||||
btrfs_path_cachep = kmem_cache_create("btrfs_path",
|
||||
sizeof(struct btrfs_path), 0,
|
||||
SLAB_MEM_SPREAD, NULL);
|
||||
if (!btrfs_path_cachep)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_ctree_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_path_cachep);
|
||||
}
|
||||
|
|
3465
fs/btrfs/ctree.h
3465
fs/btrfs/ctree.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,22 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_DEFRAG_H
|
||||
#define BTRFS_DEFRAG_H
|
||||
|
||||
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||
struct btrfs_ioctl_defrag_range_args *range,
|
||||
u64 newer_than, unsigned long max_to_defrag);
|
||||
int __init btrfs_auto_defrag_init(void);
|
||||
void __cold btrfs_auto_defrag_exit(void);
|
||||
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode, u32 extent_thresh);
|
||||
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root);
|
||||
|
||||
static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return signal_pending(current);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,5 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "block-rsv.h"
|
||||
|
@ -8,6 +9,7 @@
|
|||
#include "transaction.h"
|
||||
#include "qgroup.h"
|
||||
#include "block-group.h"
|
||||
#include "fs.h"
|
||||
|
||||
/*
|
||||
* HOW DOES THIS WORK
|
||||
|
@ -200,8 +202,8 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
|
|||
btrfs_qgroup_free_data(inode, reserved, start, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any excessive reservation
|
||||
/*
|
||||
* Release any excessive reservations for an inode.
|
||||
*
|
||||
* @inode: the inode we need to release from
|
||||
* @qgroup_free: free or convert qgroup meta. Unlike normal operation, qgroup
|
||||
|
@ -375,8 +377,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release a metadata reservation for an inode
|
||||
/*
|
||||
* Release a metadata reservation for an inode.
|
||||
*
|
||||
* @inode: the inode to release the reservation for.
|
||||
* @num_bytes: the number of bytes we are releasing.
|
||||
|
@ -403,8 +405,9 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
|||
btrfs_inode_rsv_release(inode, qgroup_free);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delalloc_release_extents - release our outstanding_extents
|
||||
/*
|
||||
* Release our outstanding_extents for an inode.
|
||||
*
|
||||
* @inode: the inode to balance the reservation for.
|
||||
* @num_bytes: the number of bytes we originally reserved with
|
||||
*
|
||||
|
@ -431,9 +434,9 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
|
|||
btrfs_inode_rsv_release(inode, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delalloc_reserve_space - reserve data and metadata space for
|
||||
* delalloc
|
||||
/*
|
||||
* Reserve data and metadata space for delalloc
|
||||
*
|
||||
* @inode: inode we're writing to
|
||||
* @start: start range we are writing to
|
||||
* @len: how long the range we are writing to
|
||||
|
@ -442,19 +445,19 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
|
|||
*
|
||||
* This will do the following things
|
||||
*
|
||||
* - reserve space in data space info for num bytes
|
||||
* and reserve precious corresponding qgroup space
|
||||
* - reserve space in data space info for num bytes and reserve precious
|
||||
* corresponding qgroup space
|
||||
* (Done in check_data_free_space)
|
||||
*
|
||||
* - reserve space for metadata space, based on the number of outstanding
|
||||
* extents and how much csums will be needed
|
||||
* also reserve metadata space in a per root over-reserve method.
|
||||
* extents and how much csums will be needed also reserve metadata space in a
|
||||
* per root over-reserve method.
|
||||
* - add to the inodes->delalloc_bytes
|
||||
* - add it to the fs_info's delalloc inodes list.
|
||||
* (Above 3 all done in delalloc_reserve_metadata)
|
||||
*
|
||||
* Return 0 for success
|
||||
* Return <0 for error(-ENOSPC or -EQUOT)
|
||||
* Return <0 for error(-ENOSPC or -EDQUOT)
|
||||
*/
|
||||
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len)
|
||||
|
@ -473,7 +476,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Release data and metadata space for delalloc
|
||||
*
|
||||
* @inode: inode we're releasing space for
|
||||
|
@ -482,10 +485,10 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
|||
* @len: length of the space already reserved
|
||||
* @qgroup_free: should qgroup reserved-space also be freed
|
||||
*
|
||||
* This function will release the metadata space that was not used and will
|
||||
* decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
|
||||
* list if there are no delalloc bytes left.
|
||||
* Also it will handle the qgroup reserved space.
|
||||
* Release the metadata space that was not used and will decrement
|
||||
* ->delalloc_bytes and remove it from the fs_info->delalloc_inodes list if
|
||||
* there are no delalloc bytes left. Also it will handle the qgroup reserved
|
||||
* space.
|
||||
*/
|
||||
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset *reserved,
|
||||
|
|
|
@ -20,5 +20,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
|||
bool qgroup_free);
|
||||
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
u64 disk_num_bytes, bool noflush);
|
||||
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
|
||||
|
||||
#endif /* BTRFS_DELALLOC_SPACE_H */
|
||||
|
|
|
@ -6,14 +6,19 @@
|
|||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/iversion.h>
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "misc.h"
|
||||
#include "delayed-inode.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "ctree.h"
|
||||
#include "qgroup.h"
|
||||
#include "locking.h"
|
||||
#include "inode-item.h"
|
||||
#include "space-info.h"
|
||||
#include "accessors.h"
|
||||
#include "file-item.h"
|
||||
|
||||
#define BTRFS_DELAYED_WRITEBACK 512
|
||||
#define BTRFS_DELAYED_BACKGROUND 128
|
||||
|
@ -1412,7 +1417,7 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
|
|||
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *dir,
|
||||
struct btrfs_disk_key *disk_key, u8 type,
|
||||
struct btrfs_disk_key *disk_key, u8 flags,
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
@ -1443,7 +1448,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_stack_dir_transid(dir_item, trans->transid);
|
||||
btrfs_set_stack_dir_data_len(dir_item, 0);
|
||||
btrfs_set_stack_dir_name_len(dir_item, name_len);
|
||||
btrfs_set_stack_dir_type(dir_item, type);
|
||||
btrfs_set_stack_dir_flags(dir_item, flags);
|
||||
memcpy((char *)(dir_item + 1), name, name_len);
|
||||
|
||||
data_len = delayed_item->data_len + sizeof(struct btrfs_item);
|
||||
|
@ -1641,8 +1646,8 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
|||
* We can only do one readdir with delayed items at a time because of
|
||||
* item->readdir_list.
|
||||
*/
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_lock(inode, 0);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_lock(BTRFS_I(inode), 0);
|
||||
|
||||
mutex_lock(&delayed_node->mutex);
|
||||
item = __btrfs_first_delayed_insertion_item(delayed_node);
|
||||
|
@ -1753,7 +1758,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
|||
name = (char *)(di + 1);
|
||||
name_len = btrfs_stack_dir_name_len(di);
|
||||
|
||||
d_type = fs_ftype_to_dtype(di->type);
|
||||
d_type = fs_ftype_to_dtype(btrfs_dir_flags_to_ftype(di->type));
|
||||
btrfs_disk_key_to_cpu(&location, &di->location);
|
||||
|
||||
over = !dir_emit(ctx, name, name_len,
|
||||
|
|
|
@ -113,7 +113,7 @@ static inline void btrfs_init_delayed_root(
|
|||
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *dir,
|
||||
struct btrfs_disk_key *disk_key, u8 type,
|
||||
struct btrfs_disk_key *disk_key, u8 flags,
|
||||
u64 index);
|
||||
|
||||
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
|
|
|
@ -6,12 +6,14 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sort.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "delayed-ref.h"
|
||||
#include "transaction.h"
|
||||
#include "qgroup.h"
|
||||
#include "space-info.h"
|
||||
#include "tree-mod-log.h"
|
||||
#include "fs.h"
|
||||
|
||||
struct kmem_cache *btrfs_delayed_ref_head_cachep;
|
||||
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
|
||||
|
@ -69,14 +71,14 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
|
|||
return btrfs_check_space_for_delayed_refs(trans->fs_info);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release a ref head's reservation
|
||||
/*
|
||||
* Release a ref head's reservation.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @nr: number of items to drop
|
||||
*
|
||||
* This drops the delayed ref head's count from the delayed refs rsv and frees
|
||||
* any excess reservation we had.
|
||||
* Drops the delayed ref head's count from the delayed refs rsv and free any
|
||||
* excess reservation we had.
|
||||
*/
|
||||
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
|
||||
{
|
||||
|
@ -102,8 +104,7 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
|
|||
}
|
||||
|
||||
/*
|
||||
* btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
|
||||
* @trans - the trans that may have generated delayed refs
|
||||
* Adjust the size of the delayed refs rsv.
|
||||
*
|
||||
* This is to be called anytime we may have adjusted trans->delayed_ref_updates,
|
||||
* it'll calculate the additional size and add it to the delayed_refs_rsv.
|
||||
|
@ -137,8 +138,8 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
|
|||
trans->delayed_ref_updates = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer bytes to our delayed refs rsv
|
||||
/*
|
||||
* Transfer bytes to our delayed refs rsv.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @src: source block rsv to transfer from
|
||||
|
@ -186,8 +187,8 @@ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
|||
delayed_refs_rsv->space_info, to_free);
|
||||
}
|
||||
|
||||
/**
|
||||
* Refill based on our delayed refs usage
|
||||
/*
|
||||
* Refill based on our delayed refs usage.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @flush: control how we can flush for this reservation.
|
||||
|
|
|
@ -18,11 +18,13 @@
|
|||
#include "volumes.h"
|
||||
#include "async-thread.h"
|
||||
#include "check-integrity.h"
|
||||
#include "rcu-string.h"
|
||||
#include "dev-replace.h"
|
||||
#include "sysfs.h"
|
||||
#include "zoned.h"
|
||||
#include "block-group.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "scrub.h"
|
||||
|
||||
/*
|
||||
* Device replace overview
|
||||
|
@ -246,7 +248,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_device *device;
|
||||
struct block_device *bdev;
|
||||
struct rcu_string *name;
|
||||
u64 devid = BTRFS_DEV_REPLACE_DEVID;
|
||||
int ret = 0;
|
||||
|
||||
|
@ -290,19 +291,12 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
|
||||
|
||||
device = btrfs_alloc_device(NULL, &devid, NULL);
|
||||
device = btrfs_alloc_device(NULL, &devid, NULL, device_path);
|
||||
if (IS_ERR(device)) {
|
||||
ret = PTR_ERR(device);
|
||||
goto error;
|
||||
}
|
||||
|
||||
name = rcu_string_strdup(device_path, GFP_KERNEL);
|
||||
if (!name) {
|
||||
btrfs_free_device(device);
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
rcu_assign_pointer(device->name, name);
|
||||
ret = lookup_bdev(device_path, &device->devt);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
@ -456,14 +450,6 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static char* btrfs_dev_name(struct btrfs_device *device)
|
||||
{
|
||||
if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
|
||||
return "<missing disk>";
|
||||
else
|
||||
return rcu_str_deref(device->name);
|
||||
}
|
||||
|
||||
static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *src_dev)
|
||||
{
|
||||
|
@ -679,7 +665,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
|||
"dev_replace from %s (devid %llu) to %s started",
|
||||
btrfs_dev_name(src_device),
|
||||
src_device->devid,
|
||||
rcu_str_deref(tgt_device->name));
|
||||
btrfs_dev_name(tgt_device));
|
||||
|
||||
/*
|
||||
* from now on, the writes to the srcdev are all duplicated to
|
||||
|
@ -938,7 +924,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||
"btrfs_scrub_dev(%s, %llu, %s) failed %d",
|
||||
btrfs_dev_name(src_device),
|
||||
src_device->devid,
|
||||
rcu_str_deref(tgt_device->name), scrub_ret);
|
||||
btrfs_dev_name(tgt_device), scrub_ret);
|
||||
error:
|
||||
up_write(&dev_replace->rwsem);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
|
@ -956,7 +942,7 @@ error:
|
|||
"dev_replace from %s (devid %llu) to %s finished",
|
||||
btrfs_dev_name(src_device),
|
||||
src_device->devid,
|
||||
rcu_str_deref(tgt_device->name));
|
||||
btrfs_dev_name(tgt_device));
|
||||
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &tgt_device->dev_state);
|
||||
tgt_device->devid = src_device->devid;
|
||||
src_device->devid = BTRFS_DEV_REPLACE_DEVID;
|
||||
|
|
|
@ -25,5 +25,13 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
|
|||
bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
struct btrfs_block_group *cache,
|
||||
u64 physical);
|
||||
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);
|
||||
|
||||
static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_bio_counter_sub(fs_info, 1);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3,9 +3,12 @@
|
|||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "accessors.h"
|
||||
#include "dir-item.h"
|
||||
|
||||
/*
|
||||
* insert a name into a directory, doing overflow properly if there is a hash
|
||||
|
@ -81,7 +84,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
|||
leaf = path->nodes[0];
|
||||
btrfs_cpu_key_to_disk(&disk_key, &location);
|
||||
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
|
||||
btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
|
||||
btrfs_set_dir_flags(leaf, dir_item, BTRFS_FT_XATTR);
|
||||
btrfs_set_dir_name_len(leaf, dir_item, name_len);
|
||||
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
|
||||
btrfs_set_dir_data_len(leaf, dir_item, data_len);
|
||||
|
@ -103,8 +106,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
|||
* to use for the second index (if one is created).
|
||||
* Will return 0 or -ENOMEM
|
||||
*/
|
||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
|
||||
int name_len, struct btrfs_inode *dir,
|
||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
|
||||
const struct fscrypt_str *name, struct btrfs_inode *dir,
|
||||
struct btrfs_key *location, u8 type, u64 index)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -120,7 +123,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
|
|||
|
||||
key.objectid = btrfs_ino(dir);
|
||||
key.type = BTRFS_DIR_ITEM_KEY;
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
key.offset = btrfs_name_hash(name->name, name->len);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
|
@ -128,9 +131,9 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
|
|||
|
||||
btrfs_cpu_key_to_disk(&disk_key, location);
|
||||
|
||||
data_size = sizeof(*dir_item) + name_len;
|
||||
data_size = sizeof(*dir_item) + name->len;
|
||||
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
|
||||
name, name_len);
|
||||
name->name, name->len);
|
||||
if (IS_ERR(dir_item)) {
|
||||
ret = PTR_ERR(dir_item);
|
||||
if (ret == -EEXIST)
|
||||
|
@ -138,15 +141,18 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
|
|||
goto out_free;
|
||||
}
|
||||
|
||||
if (IS_ENCRYPTED(&dir->vfs_inode))
|
||||
type |= BTRFS_FT_ENCRYPTED;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
|
||||
btrfs_set_dir_type(leaf, dir_item, type);
|
||||
btrfs_set_dir_flags(leaf, dir_item, type);
|
||||
btrfs_set_dir_data_len(leaf, dir_item, 0);
|
||||
btrfs_set_dir_name_len(leaf, dir_item, name_len);
|
||||
btrfs_set_dir_name_len(leaf, dir_item, name->len);
|
||||
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
|
||||
name_ptr = (unsigned long)(dir_item + 1);
|
||||
|
||||
write_extent_buffer(leaf, name, name_ptr, name_len);
|
||||
write_extent_buffer(leaf, name->name, name_ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
second_insert:
|
||||
|
@ -157,7 +163,7 @@ second_insert:
|
|||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
|
||||
ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir,
|
||||
&disk_key, type, index);
|
||||
out_free:
|
||||
btrfs_free_path(path);
|
||||
|
@ -206,7 +212,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir(
|
|||
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
const char *name, int name_len,
|
||||
const struct fscrypt_str *name,
|
||||
int mod)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
|
@ -214,9 +220,10 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
|||
|
||||
key.objectid = dir;
|
||||
key.type = BTRFS_DIR_ITEM_KEY;
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
key.offset = btrfs_name_hash(name->name, name->len);
|
||||
|
||||
di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
|
||||
di = btrfs_lookup_match_dir(trans, root, path, &key, name->name,
|
||||
name->len, mod);
|
||||
if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
|
||||
return NULL;
|
||||
|
||||
|
@ -224,7 +231,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
|
||||
const char *name, int name_len)
|
||||
const struct fscrypt_str *name)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
|
@ -240,9 +247,10 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
|
|||
|
||||
key.objectid = dir;
|
||||
key.type = BTRFS_DIR_ITEM_KEY;
|
||||
key.offset = btrfs_name_hash(name, name_len);
|
||||
key.offset = btrfs_name_hash(name->name, name->len);
|
||||
|
||||
di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0);
|
||||
di = btrfs_lookup_match_dir(NULL, root, path, &key, name->name,
|
||||
name->len, 0);
|
||||
if (IS_ERR(di)) {
|
||||
ret = PTR_ERR(di);
|
||||
/* Nothing found, we're safe */
|
||||
|
@ -262,11 +270,8 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
|
|||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* see if there is room in the item to insert this
|
||||
* name
|
||||
*/
|
||||
data_size = sizeof(*di) + name_len;
|
||||
/* See if there is room in the item to insert this name. */
|
||||
data_size = sizeof(*di) + name->len;
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (data_size + btrfs_item_size(leaf, slot) +
|
||||
|
@ -303,8 +308,7 @@ struct btrfs_dir_item *
|
|||
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
u64 index, const char *name, int name_len,
|
||||
int mod)
|
||||
u64 index, const struct fscrypt_str *name, int mod)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_key key;
|
||||
|
@ -313,7 +317,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
|
|||
key.type = BTRFS_DIR_INDEX_KEY;
|
||||
key.offset = index;
|
||||
|
||||
di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
|
||||
di = btrfs_lookup_match_dir(trans, root, path, &key, name->name,
|
||||
name->len, mod);
|
||||
if (di == ERR_PTR(-ENOENT))
|
||||
return NULL;
|
||||
|
||||
|
@ -321,9 +326,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
struct btrfs_dir_item *
|
||||
btrfs_search_dir_index_item(struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dirid,
|
||||
const char *name, int name_len)
|
||||
btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path,
|
||||
u64 dirid, const struct fscrypt_str *name)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_key key;
|
||||
|
@ -338,7 +342,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root,
|
|||
break;
|
||||
|
||||
di = btrfs_match_dir_item_name(root->fs_info, path,
|
||||
name, name_len);
|
||||
name->name, name->len);
|
||||
if (di)
|
||||
return di;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_DIR_ITEM_H
|
||||
#define BTRFS_DIR_ITEM_H
|
||||
|
||||
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
|
||||
const struct fscrypt_str *name);
|
||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
|
||||
const struct fscrypt_str *name, struct btrfs_inode *dir,
|
||||
struct btrfs_key *location, u8 type, u64 index);
|
||||
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
const struct fscrypt_str *name, int mod);
|
||||
struct btrfs_dir_item *btrfs_lookup_dir_index_item(
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
u64 index, const struct fscrypt_str *name, int mod);
|
||||
struct btrfs_dir_item *btrfs_search_dir_index_item(struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dirid,
|
||||
const struct fscrypt_str *name);
|
||||
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_dir_item *di);
|
||||
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid,
|
||||
const char *name, u16 name_len,
|
||||
const void *data, u16 data_len);
|
||||
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
const char *name, u16 name_len,
|
||||
int mod);
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
const char *name,
|
||||
int name_len);
|
||||
|
||||
#endif
|
|
@ -11,6 +11,7 @@
|
|||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
#include "free-space-cache.h"
|
||||
#include "fs.h"
|
||||
|
||||
/*
|
||||
* This contains the logic to handle async discard.
|
||||
|
@ -61,7 +62,7 @@
|
|||
#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
|
||||
#define BTRFS_DISCARD_MAX_IOPS (10U)
|
||||
|
||||
/* Montonically decreasing minimum length filters after index 0 */
|
||||
/* Monotonically decreasing minimum length filters after index 0 */
|
||||
static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
|
||||
0,
|
||||
BTRFS_ASYNC_DISCARD_MAX_FILTER,
|
||||
|
@ -146,8 +147,9 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
|||
return running;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_next_block_group - find block_group that's up next for discarding
|
||||
/*
|
||||
* Find block_group that's up next for discarding.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @now: current time
|
||||
*
|
||||
|
@ -184,17 +186,17 @@ static struct btrfs_block_group *find_next_block_group(
|
|||
return ret_block_group;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap find_next_block_group()
|
||||
/*
|
||||
* Look up next block group and set it for use.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @discard_state: the discard_state of the block_group after state management
|
||||
* @discard_index: the discard_index of the block_group after state management
|
||||
* @now: time when discard was invoked, in ns
|
||||
*
|
||||
* This wraps find_next_block_group() and sets the block_group to be in use.
|
||||
* discard_state's control flow is managed here. Variables related to
|
||||
* discard_state are reset here as needed (eg discard_cursor). @discard_state
|
||||
* Wrap find_next_block_group() and set the block_group to be in use.
|
||||
* @discard_state's control flow is managed here. Variables related to
|
||||
* @discard_state are reset here as needed (eg. @discard_cursor). @discard_state
|
||||
* and @discard_index are remembered as it may change while we're discarding,
|
||||
* but we want the discard to execute in the context determined here.
|
||||
*/
|
||||
|
@ -233,8 +235,9 @@ again:
|
|||
return block_group;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_check_filter - updates a block groups filters
|
||||
/*
|
||||
* Update a block group's filters.
|
||||
*
|
||||
* @block_group: block group of interest
|
||||
* @bytes: recently freed region size after coalescing
|
||||
*
|
||||
|
@ -271,8 +274,9 @@ void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_update_discard_index - moves a block group along the discard lists
|
||||
/*
|
||||
* Move a block group along the discard lists.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
|
@ -291,13 +295,14 @@ static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
|
|||
add_to_discard_list(discard_ctl, block_group);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_cancel_work - remove a block_group from the discard lists
|
||||
/*
|
||||
* Remove a block_group from the discard lists.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This removes @block_group from the discard lists. If necessary, it waits on
|
||||
* the current work and then reschedules the delayed work.
|
||||
* Remove @block_group from the discard lists. If necessary, wait on the
|
||||
* current work and then reschedule the delayed work.
|
||||
*/
|
||||
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
|
@ -308,12 +313,13 @@ void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_queue_work - handles queuing the block_groups
|
||||
/*
|
||||
* Handles queuing the block_groups.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This maintains the LRU order of the discard lists.
|
||||
* Maintain the LRU order of the discard lists.
|
||||
*/
|
||||
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
|
@ -383,7 +389,8 @@ static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
|||
}
|
||||
|
||||
/*
|
||||
* btrfs_discard_schedule_work - responsible for scheduling the discard work
|
||||
* Responsible for scheduling the discard work.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @override: override the current timer
|
||||
*
|
||||
|
@ -401,15 +408,16 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
|||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_finish_discard_pass - determine next step of a block_group
|
||||
/*
|
||||
* Determine next step of a block_group.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This determines the next step for a block group after it's finished going
|
||||
* through a pass on a discard list. If it is unused and fully trimmed, we can
|
||||
* mark it unused and send it to the unused_bgs path. Otherwise, pass it onto
|
||||
* the appropriate filter list or let it fall off.
|
||||
* Determine the next step for a block group after it's finished going through
|
||||
* a pass on a discard list. If it is unused and fully trimmed, we can mark it
|
||||
* unused and send it to the unused_bgs path. Otherwise, pass it onto the
|
||||
* appropriate filter list or let it fall off.
|
||||
*/
|
||||
static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
|
@ -426,12 +434,13 @@ static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_workfn - discard work function
|
||||
/*
|
||||
* Discard work queue callback
|
||||
*
|
||||
* @work: work
|
||||
*
|
||||
* This finds the next block_group to start discarding and then discards a
|
||||
* single region. It does this in a two-pass fashion: first extents and second
|
||||
* Find the next block_group to start discarding and then discard a single
|
||||
* region. It does this in a two-pass fashion: first extents and second
|
||||
* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
|
||||
*/
|
||||
static void btrfs_discard_workfn(struct work_struct *work)
|
||||
|
@ -507,11 +516,12 @@ static void btrfs_discard_workfn(struct work_struct *work)
|
|||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_run_discard_work - determines if async discard should be running
|
||||
/*
|
||||
* Determine if async discard should be running.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
*
|
||||
* Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
|
||||
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
|
||||
*/
|
||||
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
|
||||
{
|
||||
|
@ -523,8 +533,9 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
|
|||
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_calc_delay - recalculate the base delay
|
||||
/*
|
||||
* Recalculate the base delay.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
*
|
||||
* Recalculate the base delay which is based off the total number of
|
||||
|
@ -545,7 +556,7 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
|||
spin_lock(&discard_ctl->lock);
|
||||
|
||||
/*
|
||||
* The following is to fix a potential -1 discrepenancy that we're not
|
||||
* The following is to fix a potential -1 discrepancy that we're not
|
||||
* sure how to reproduce. But given that this is the only place that
|
||||
* utilizes these numbers and this is only called by from
|
||||
* btrfs_finish_extent_commit() which is synchronized, we can correct
|
||||
|
@ -578,13 +589,14 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
|||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_update_discardable - propagate discard counters
|
||||
/*
|
||||
* Propagate discard counters.
|
||||
*
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This propagates deltas of counters up to the discard_ctl. It maintains a
|
||||
* current counter and a previous counter passing the delta up to the global
|
||||
* stat. Then the current counter value becomes the previous counter value.
|
||||
* Propagate deltas of counters up to the discard_ctl. It maintains a current
|
||||
* counter and a previous counter passing the delta up to the global stat.
|
||||
* Then the current counter value becomes the previous counter value.
|
||||
*/
|
||||
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
|
||||
{
|
||||
|
@ -619,8 +631,9 @@ void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
|
||||
/*
|
||||
* Punt unused_bgs list to discard lists.
|
||||
*
|
||||
* @fs_info: fs_info of interest
|
||||
*
|
||||
* The unused_bgs list needs to be punted to the discard lists because the
|
||||
|
@ -644,8 +657,9 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
|
|||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_purge_list - purge discard lists
|
||||
/*
|
||||
* Purge discard lists.
|
||||
*
|
||||
* @discard_ctl: discard control
|
||||
*
|
||||
* If we are disabling async discard, we may have intercepted block groups that
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "volumes.h"
|
||||
#include "bio.h"
|
||||
#include "print-tree.h"
|
||||
#include "locking.h"
|
||||
#include "tree-log.h"
|
||||
|
@ -43,6 +43,15 @@
|
|||
#include "space-info.h"
|
||||
#include "zoned.h"
|
||||
#include "subpage.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "root-tree.h"
|
||||
#include "defrag.h"
|
||||
#include "uuid-tree.h"
|
||||
#include "relocation.h"
|
||||
#include "scrub.h"
|
||||
#include "super.h"
|
||||
|
||||
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
|
||||
BTRFS_HEADER_FLAG_RELOC |\
|
||||
|
@ -75,12 +84,12 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
|
|||
* just before they are sent down the IO stack.
|
||||
*/
|
||||
struct async_submit_bio {
|
||||
struct inode *inode;
|
||||
struct btrfs_inode *inode;
|
||||
struct bio *bio;
|
||||
extent_submit_bio_start_t *submit_bio_start;
|
||||
enum btrfs_wq_submit_cmd submit_cmd;
|
||||
int mirror_num;
|
||||
|
||||
/* Optional parameter for submit_bio_start used by direct io */
|
||||
/* Optional parameter for used by direct io */
|
||||
u64 dio_file_offset;
|
||||
struct btrfs_work work;
|
||||
blk_status_t status;
|
||||
|
@ -246,40 +255,54 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
|
||||
int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
u64 start = eb->start;
|
||||
int i, num_pages = num_extent_pages(eb);
|
||||
int ret = 0;
|
||||
|
||||
if (sb_rdonly(fs_info->sb))
|
||||
return -EROFS;
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
struct page *p = eb->pages[i];
|
||||
|
||||
ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE,
|
||||
start, p, start - page_offset(p), mirror_num);
|
||||
if (ret)
|
||||
break;
|
||||
start += PAGE_SIZE;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to read a given tree block, doing retries as required when
|
||||
* the checksums don't match and we have alternate mirrors to try.
|
||||
*
|
||||
* @parent_transid: expected transid, skip check if 0
|
||||
* @level: expected level, mandatory check
|
||||
* @first_key: expected key of first slot, skip check if NULL
|
||||
* @check: expected tree parentness check, see the comments of the
|
||||
* structure for details.
|
||||
*/
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *eb,
|
||||
u64 parent_transid, int level,
|
||||
struct btrfs_key *first_key)
|
||||
struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
struct extent_io_tree *io_tree;
|
||||
int failed = 0;
|
||||
int ret;
|
||||
int num_copies = 0;
|
||||
int mirror_num = 0;
|
||||
int failed_mirror = 0;
|
||||
|
||||
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
|
||||
ASSERT(check);
|
||||
|
||||
while (1) {
|
||||
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
|
||||
ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
|
||||
if (!ret) {
|
||||
if (verify_parent_transid(io_tree, eb,
|
||||
parent_transid, 0))
|
||||
ret = -EIO;
|
||||
else if (btrfs_verify_level_key(eb, level,
|
||||
first_key, parent_transid))
|
||||
ret = -EUCLEAN;
|
||||
else
|
||||
ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
|
||||
if (!ret)
|
||||
break;
|
||||
}
|
||||
|
||||
num_copies = btrfs_num_copies(fs_info,
|
||||
eb->start, eb->len);
|
||||
|
@ -455,7 +478,8 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
|
|||
}
|
||||
|
||||
/* Do basic extent buffer checks at read time */
|
||||
static int validate_extent_buffer(struct extent_buffer *eb)
|
||||
static int validate_extent_buffer(struct extent_buffer *eb,
|
||||
struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
u64 found_start;
|
||||
|
@ -465,6 +489,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
|
|||
const u8 *header_csum;
|
||||
int ret = 0;
|
||||
|
||||
ASSERT(check);
|
||||
|
||||
found_start = btrfs_header_bytenr(eb);
|
||||
if (found_start != eb->start) {
|
||||
btrfs_err_rl(fs_info,
|
||||
|
@ -503,6 +529,45 @@ static int validate_extent_buffer(struct extent_buffer *eb)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (found_level != check->level) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (unlikely(check->transid &&
|
||||
btrfs_header_generation(eb) != check->transid)) {
|
||||
btrfs_err_rl(eb->fs_info,
|
||||
"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
|
||||
eb->start, eb->read_mirror, check->transid,
|
||||
btrfs_header_generation(eb));
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (check->has_first_key) {
|
||||
struct btrfs_key *expect_key = &check->first_key;
|
||||
struct btrfs_key found_key;
|
||||
|
||||
if (found_level)
|
||||
btrfs_node_key_to_cpu(eb, &found_key, 0);
|
||||
else
|
||||
btrfs_item_key_to_cpu(eb, &found_key, 0);
|
||||
if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) {
|
||||
btrfs_err(fs_info,
|
||||
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
|
||||
eb->start, check->transid,
|
||||
expect_key->objectid,
|
||||
expect_key->type, expect_key->offset,
|
||||
found_key.objectid, found_key.type,
|
||||
found_key.offset);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (check->owner_root) {
|
||||
ret = btrfs_check_eb_owner(eb, check->owner_root);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a leaf block and it is corrupt, set the corrupt bit so
|
||||
* that we don't try and read the other copies of this block, just
|
||||
|
@ -527,13 +592,15 @@ out:
|
|||
}
|
||||
|
||||
static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
|
||||
int mirror)
|
||||
int mirror, struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
|
||||
struct extent_buffer *eb;
|
||||
bool reads_done;
|
||||
int ret = 0;
|
||||
|
||||
ASSERT(check);
|
||||
|
||||
/*
|
||||
* We don't allow bio merge for subpage metadata read, so we should
|
||||
* only get one eb for each endio hook.
|
||||
|
@ -557,7 +624,7 @@ static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
|
|||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
ret = validate_extent_buffer(eb);
|
||||
ret = validate_extent_buffer(eb, check);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
|
@ -587,7 +654,8 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
|||
ASSERT(page->private);
|
||||
|
||||
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
|
||||
return validate_subpage_buffer(page, start, end, mirror);
|
||||
return validate_subpage_buffer(page, start, end, mirror,
|
||||
&bbio->parent_check);
|
||||
|
||||
eb = (struct extent_buffer *)page->private;
|
||||
|
||||
|
@ -606,7 +674,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
|||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
ret = validate_extent_buffer(eb);
|
||||
ret = validate_extent_buffer(eb, &bbio->parent_check);
|
||||
err:
|
||||
if (ret) {
|
||||
/*
|
||||
|
@ -628,8 +696,18 @@ static void run_one_async_start(struct btrfs_work *work)
|
|||
blk_status_t ret;
|
||||
|
||||
async = container_of(work, struct async_submit_bio, work);
|
||||
ret = async->submit_bio_start(async->inode, async->bio,
|
||||
async->dio_file_offset);
|
||||
switch (async->submit_cmd) {
|
||||
case WQ_SUBMIT_METADATA:
|
||||
ret = btree_submit_bio_start(async->bio);
|
||||
break;
|
||||
case WQ_SUBMIT_DATA:
|
||||
ret = btrfs_submit_bio_start(async->inode, async->bio);
|
||||
break;
|
||||
case WQ_SUBMIT_DATA_DIO:
|
||||
ret = btrfs_submit_bio_start_direct_io(async->inode,
|
||||
async->bio, async->dio_file_offset);
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
async->status = ret;
|
||||
}
|
||||
|
@ -646,7 +724,7 @@ static void run_one_async_done(struct btrfs_work *work)
|
|||
{
|
||||
struct async_submit_bio *async =
|
||||
container_of(work, struct async_submit_bio, work);
|
||||
struct inode *inode = async->inode;
|
||||
struct btrfs_inode *inode = async->inode;
|
||||
struct btrfs_bio *bbio = btrfs_bio(async->bio);
|
||||
|
||||
/* If an error occurred we just want to clean up the bio and move on */
|
||||
|
@ -661,7 +739,7 @@ static void run_one_async_done(struct btrfs_work *work)
|
|||
* This changes nothing when cgroups aren't in use.
|
||||
*/
|
||||
async->bio->bi_opf |= REQ_CGROUP_PUNT;
|
||||
btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
|
||||
btrfs_submit_bio(inode->root->fs_info, async->bio, async->mirror_num);
|
||||
}
|
||||
|
||||
static void run_one_async_free(struct btrfs_work *work)
|
||||
|
@ -679,11 +757,10 @@ static void run_one_async_free(struct btrfs_work *work)
|
|||
* - true if the work has been succesfuly submitted
|
||||
* - false in case of error
|
||||
*/
|
||||
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
|
||||
u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start)
|
||||
bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num,
|
||||
u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct async_submit_bio *async;
|
||||
|
||||
async = kmalloc(sizeof(*async), GFP_NOFS);
|
||||
|
@ -693,7 +770,7 @@ bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
|
|||
async->inode = inode;
|
||||
async->bio = bio;
|
||||
async->mirror_num = mirror_num;
|
||||
async->submit_bio_start = submit_bio_start;
|
||||
async->submit_cmd = cmd;
|
||||
|
||||
btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
|
||||
run_one_async_free);
|
||||
|
@ -727,8 +804,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
|
|||
return errno_to_blk_status(ret);
|
||||
}
|
||||
|
||||
static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio,
|
||||
u64 dio_file_offset)
|
||||
blk_status_t btree_submit_bio_start(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* when we're called for a write, we're already in the async
|
||||
|
@ -749,13 +825,14 @@ static bool should_async_write(struct btrfs_fs_info *fs_info,
|
|||
return true;
|
||||
}
|
||||
|
||||
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num)
|
||||
void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t ret;
|
||||
|
||||
bio->bi_opf |= REQ_META;
|
||||
bbio->is_metadata = 1;
|
||||
|
||||
if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
|
||||
btrfs_submit_bio(fs_info, bio, mirror_num);
|
||||
|
@ -766,8 +843,8 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
|
|||
* Kthread helpers are used to submit writes so that checksumming can
|
||||
* happen in parallel across all CPUs.
|
||||
*/
|
||||
if (should_async_write(fs_info, BTRFS_I(inode)) &&
|
||||
btrfs_wq_submit_bio(inode, bio, mirror_num, 0, btree_submit_bio_start))
|
||||
if (should_async_write(fs_info, inode) &&
|
||||
btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_METADATA))
|
||||
return;
|
||||
|
||||
ret = btree_csum_one_bio(bio);
|
||||
|
@ -919,28 +996,28 @@ struct extent_buffer *btrfs_find_create_tree_block(
|
|||
* Read tree block at logical address @bytenr and do variant basic but critical
|
||||
* verification.
|
||||
*
|
||||
* @owner_root: the objectid of the root owner for this block.
|
||||
* @parent_transid: expected transid of this tree block, skip check if 0
|
||||
* @level: expected level, mandatory check
|
||||
* @first_key: expected key in slot 0, skip check if NULL
|
||||
* @check: expected tree parentness check, see comments of the
|
||||
* structure for details.
|
||||
*/
|
||||
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 owner_root, u64 parent_transid,
|
||||
int level, struct btrfs_key *first_key)
|
||||
struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct extent_buffer *buf = NULL;
|
||||
int ret;
|
||||
|
||||
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
|
||||
ASSERT(check);
|
||||
|
||||
buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root,
|
||||
check->level);
|
||||
if (IS_ERR(buf))
|
||||
return buf;
|
||||
|
||||
ret = btrfs_read_extent_buffer(buf, parent_transid, level, first_key);
|
||||
ret = btrfs_read_extent_buffer(buf, check);
|
||||
if (ret) {
|
||||
free_extent_buffer_stale(buf);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
if (btrfs_check_eb_owner(buf, owner_root)) {
|
||||
if (btrfs_check_eb_owner(buf, check->owner_root)) {
|
||||
free_extent_buffer_stale(buf);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
|
@ -1027,9 +1104,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
|||
root->anon_dev = 0;
|
||||
if (!dummy) {
|
||||
extent_io_tree_init(fs_info, &root->dirty_log_pages,
|
||||
IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
|
||||
IO_TREE_ROOT_DIRTY_LOG_PAGES);
|
||||
extent_io_tree_init(fs_info, &root->log_csum_range,
|
||||
IO_TREE_LOG_CSUM_RANGE, NULL);
|
||||
IO_TREE_LOG_CSUM_RANGE);
|
||||
}
|
||||
|
||||
spin_lock_init(&root->root_item_lock);
|
||||
|
@ -1167,6 +1244,13 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
|
|||
return btrfs_global_root(fs_info, &key);
|
||||
}
|
||||
|
||||
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
||||
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
u64 objectid)
|
||||
{
|
||||
|
@ -1197,7 +1281,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
|||
if (IS_ERR(leaf)) {
|
||||
ret = PTR_ERR(leaf);
|
||||
leaf = NULL;
|
||||
goto fail_unlock;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
root->node = leaf;
|
||||
|
@ -1232,9 +1316,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
|||
|
||||
return root;
|
||||
|
||||
fail_unlock:
|
||||
if (leaf)
|
||||
btrfs_tree_unlock(leaf);
|
||||
fail:
|
||||
btrfs_put_root(root);
|
||||
|
||||
|
@ -1352,6 +1433,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
|
|||
struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
struct btrfs_fs_info *fs_info = tree_root->fs_info;
|
||||
u64 generation;
|
||||
int ret;
|
||||
|
@ -1371,9 +1453,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
|
|||
|
||||
generation = btrfs_root_generation(&root->root_item);
|
||||
level = btrfs_root_level(&root->root_item);
|
||||
root->node = read_tree_block(fs_info,
|
||||
btrfs_root_bytenr(&root->root_item),
|
||||
key->objectid, generation, level, NULL);
|
||||
check.level = level;
|
||||
check.transid = generation;
|
||||
check.owner_root = key->objectid;
|
||||
root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item),
|
||||
&check);
|
||||
if (IS_ERR(root->node)) {
|
||||
ret = PTR_ERR(root->node);
|
||||
root->node = NULL;
|
||||
|
@ -2084,8 +2168,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
|||
btrfs_destroy_workqueue(fs_info->workers);
|
||||
if (fs_info->endio_workers)
|
||||
destroy_workqueue(fs_info->endio_workers);
|
||||
if (fs_info->endio_raid56_workers)
|
||||
destroy_workqueue(fs_info->endio_raid56_workers);
|
||||
if (fs_info->rmw_workers)
|
||||
destroy_workqueue(fs_info->rmw_workers);
|
||||
if (fs_info->compressed_write_workers)
|
||||
|
@ -2231,7 +2313,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
|||
|
||||
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
|
||||
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
|
||||
IO_TREE_BTREE_INODE_IO, NULL);
|
||||
IO_TREE_BTREE_INODE_IO);
|
||||
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
|
||||
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
|
||||
|
@ -2291,8 +2373,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
|||
alloc_workqueue("btrfs-endio", flags, max_active);
|
||||
fs_info->endio_meta_workers =
|
||||
alloc_workqueue("btrfs-endio-meta", flags, max_active);
|
||||
fs_info->endio_raid56_workers =
|
||||
alloc_workqueue("btrfs-endio-raid56", flags, max_active);
|
||||
fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
|
||||
fs_info->endio_write_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-write", flags,
|
||||
|
@ -2314,7 +2394,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
|||
fs_info->delalloc_workers && fs_info->flush_workers &&
|
||||
fs_info->endio_workers && fs_info->endio_meta_workers &&
|
||||
fs_info->compressed_write_workers &&
|
||||
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
|
||||
fs_info->endio_write_workers &&
|
||||
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
|
||||
fs_info->caching_workers && fs_info->fixup_workers &&
|
||||
fs_info->delayed_workers && fs_info->qgroup_rescan_workers &&
|
||||
|
@ -2350,6 +2430,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_fs_devices *fs_devices)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
struct btrfs_root *log_tree_root;
|
||||
struct btrfs_super_block *disk_super = fs_info->super_copy;
|
||||
u64 bytenr = btrfs_super_log_root(disk_super);
|
||||
|
@ -2365,10 +2446,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
|||
if (!log_tree_root)
|
||||
return -ENOMEM;
|
||||
|
||||
log_tree_root->node = read_tree_block(fs_info, bytenr,
|
||||
BTRFS_TREE_LOG_OBJECTID,
|
||||
fs_info->generation + 1, level,
|
||||
NULL);
|
||||
check.level = level;
|
||||
check.transid = fs_info->generation + 1;
|
||||
check.owner_root = BTRFS_TREE_LOG_OBJECTID;
|
||||
log_tree_root->node = read_tree_block(fs_info, bytenr, &check);
|
||||
if (IS_ERR(log_tree_root->node)) {
|
||||
btrfs_warn(fs_info, "failed to read log tree");
|
||||
ret = PTR_ERR(log_tree_root->node);
|
||||
|
@ -2846,10 +2927,14 @@ out:
|
|||
|
||||
static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
|
||||
{
|
||||
struct btrfs_tree_parent_check check = {
|
||||
.level = level,
|
||||
.transid = gen,
|
||||
.owner_root = root->root_key.objectid
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
root->node = read_tree_block(root->fs_info, bytenr,
|
||||
root->root_key.objectid, gen, level, NULL);
|
||||
root->node = read_tree_block(root->fs_info, bytenr, &check);
|
||||
if (IS_ERR(root->node)) {
|
||||
ret = PTR_ERR(root->node);
|
||||
root->node = NULL;
|
||||
|
@ -3057,7 +3142,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
|||
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
|
||||
|
||||
extent_io_tree_init(fs_info, &fs_info->excluded_extents,
|
||||
IO_TREE_FS_EXCLUDED_EXTENTS, NULL);
|
||||
IO_TREE_FS_EXCLUDED_EXTENTS);
|
||||
|
||||
mutex_init(&fs_info->ordered_operations_mutex);
|
||||
mutex_init(&fs_info->tree_log_mutex);
|
||||
|
@ -3743,10 +3828,18 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||
}
|
||||
|
||||
/*
|
||||
* Mount does not set all options immediately, we can do it now and do
|
||||
* not have to wait for transaction commit
|
||||
* For devices supporting discard turn on discard=async automatically,
|
||||
* unless it's already set or disabled. This could be turned off by
|
||||
* nodiscard for the same mount.
|
||||
*/
|
||||
btrfs_apply_pending_changes(fs_info);
|
||||
if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
|
||||
btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
|
||||
btrfs_test_opt(fs_info, NODISCARD)) &&
|
||||
fs_info->fs_devices->discardable) {
|
||||
btrfs_set_and_info(fs_info, DISCARD_ASYNC,
|
||||
"auto enabling async discard");
|
||||
btrfs_clear_opt(fs_info->mount_opt, NODISCARD);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) {
|
||||
|
@ -3875,7 +3968,7 @@ static void btrfs_end_super_write(struct bio *bio)
|
|||
if (bio->bi_status) {
|
||||
btrfs_warn_rl_in_rcu(device->fs_info,
|
||||
"lost page write due to IO error on %s (%d)",
|
||||
rcu_str_deref(device->name),
|
||||
btrfs_dev_name(device),
|
||||
blk_status_to_errno(bio->bi_status));
|
||||
ClearPageUptodate(page);
|
||||
SetPageError(page);
|
||||
|
|
|
@ -27,14 +27,14 @@ static inline u64 btrfs_sb_offset(int mirror)
|
|||
|
||||
struct btrfs_device;
|
||||
struct btrfs_fs_devices;
|
||||
struct btrfs_tree_parent_check;
|
||||
|
||||
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
struct btrfs_key *first_key, u64 parent_transid);
|
||||
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 owner_root, u64 parent_transid,
|
||||
int level, struct btrfs_key *first_key);
|
||||
struct btrfs_tree_parent_check *check);
|
||||
struct extent_buffer *btrfs_find_create_tree_block(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 owner_root,
|
||||
|
@ -75,6 +75,7 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_key *key);
|
||||
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
|
||||
|
||||
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
|
@ -85,7 +86,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|||
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
||||
struct page *page, u64 start, u64 end,
|
||||
int mirror);
|
||||
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num);
|
||||
void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num);
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
||||
#endif
|
||||
|
@ -106,24 +107,22 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
||||
void btrfs_put_root(struct btrfs_root *root);
|
||||
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
|
||||
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
|
||||
int level, struct btrfs_key *first_key);
|
||||
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
|
||||
u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start);
|
||||
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
||||
int mirror_num);
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *buf,
|
||||
struct btrfs_tree_parent_check *check);
|
||||
|
||||
enum btrfs_wq_submit_cmd {
|
||||
WQ_SUBMIT_METADATA,
|
||||
WQ_SUBMIT_DATA,
|
||||
WQ_SUBMIT_DATA_DIO,
|
||||
};
|
||||
|
||||
bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num,
|
||||
u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd);
|
||||
blk_status_t btree_submit_bio_start(struct bio *bio);
|
||||
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
||||
|
@ -136,8 +135,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
|
|||
struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
u64 objectid);
|
||||
int btree_lock_page_hook(struct page *page, void *data,
|
||||
void (*flush_fn)(void *));
|
||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
|
||||
int btrfs_init_root_free_objectid(struct btrfs_root *root);
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#include "btrfs_inode.h"
|
||||
#include "print-tree.h"
|
||||
#include "export.h"
|
||||
#include "accessors.h"
|
||||
#include "super.h"
|
||||
|
||||
#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
|
||||
parent_objectid) / 4)
|
||||
|
@ -57,9 +59,20 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
|
|||
return type;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read dentry of inode with @objectid from filesystem root @root_objectid.
|
||||
*
|
||||
* @sb: the filesystem super block
|
||||
* @objectid: inode objectid
|
||||
* @root_objectid: object id of the subvolume root where to look up the inode
|
||||
* @generation: optional, if not zero, verify that the found inode
|
||||
* generation matches
|
||||
*
|
||||
* Return dentry alias for the inode, otherwise an error. In case the
|
||||
* generation does not match return ESTALE.
|
||||
*/
|
||||
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
u64 root_objectid, u64 generation,
|
||||
int check_generation)
|
||||
u64 root_objectid, u64 generation)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
struct btrfs_root *root;
|
||||
|
@ -77,7 +90,7 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
|||
if (IS_ERR(inode))
|
||||
return ERR_CAST(inode);
|
||||
|
||||
if (check_generation && generation != inode->i_generation) {
|
||||
if (generation != 0 && generation != inode->i_generation) {
|
||||
iput(inode);
|
||||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
|
@ -106,7 +119,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
|
|||
objectid = fid->parent_objectid;
|
||||
generation = fid->parent_gen;
|
||||
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
||||
|
@ -128,7 +141,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
|||
root_objectid = fid->root_objectid;
|
||||
generation = fid->gen;
|
||||
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
|
||||
}
|
||||
|
||||
struct dentry *btrfs_get_parent(struct dentry *child)
|
||||
|
@ -188,7 +201,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
|
|||
|
||||
if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
|
||||
return btrfs_get_dentry(fs_info->sb, key.objectid,
|
||||
found_key.offset, 0, 0);
|
||||
found_key.offset, 0);
|
||||
}
|
||||
|
||||
return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root));
|
||||
|
|
|
@ -19,8 +19,7 @@ struct btrfs_fid {
|
|||
} __attribute__ ((packed));
|
||||
|
||||
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
u64 root_objectid, u64 generation,
|
||||
int check_generation);
|
||||
u64 root_objectid, u64 generation);
|
||||
struct dentry *btrfs_get_parent(struct dentry *child);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <linux/slab.h>
|
||||
#include <trace/events/btrfs.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "extent-io-tree.h"
|
||||
#include "btrfs_inode.h"
|
||||
|
@ -57,17 +58,17 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
|
|||
struct extent_io_tree *tree,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
struct inode *inode = tree->private_data;
|
||||
struct btrfs_inode *inode = tree->inode;
|
||||
u64 isize;
|
||||
|
||||
if (!inode)
|
||||
return;
|
||||
|
||||
isize = i_size_read(inode);
|
||||
isize = i_size_read(&inode->vfs_inode);
|
||||
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
|
||||
btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
|
||||
btrfs_debug_rl(inode->root->fs_info,
|
||||
"%s: ino %llu isize %llu odd range [%llu,%llu]",
|
||||
caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
|
||||
caller, btrfs_ino(inode), isize, start, end);
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
@ -93,13 +94,12 @@ struct tree_entry {
|
|||
};
|
||||
|
||||
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *tree, unsigned int owner,
|
||||
void *private_data)
|
||||
struct extent_io_tree *tree, unsigned int owner)
|
||||
{
|
||||
tree->fs_info = fs_info;
|
||||
tree->state = RB_ROOT;
|
||||
spin_lock_init(&tree->lock);
|
||||
tree->private_data = private_data;
|
||||
tree->inode = NULL;
|
||||
tree->owner = owner;
|
||||
if (owner == IO_TREE_INODE_FILE_EXTENT)
|
||||
lockdep_set_class(&tree->lock, &file_extent_tree_class);
|
||||
|
@ -346,9 +346,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
|
|||
other = prev_state(state);
|
||||
if (other && other->end == state->start - 1 &&
|
||||
other->state == state->state) {
|
||||
if (tree->private_data)
|
||||
btrfs_merge_delalloc_extent(tree->private_data,
|
||||
state, other);
|
||||
if (tree->inode)
|
||||
btrfs_merge_delalloc_extent(tree->inode, state, other);
|
||||
state->start = other->start;
|
||||
rb_erase(&other->rb_node, &tree->state);
|
||||
RB_CLEAR_NODE(&other->rb_node);
|
||||
|
@ -357,9 +356,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
|
|||
other = next_state(state);
|
||||
if (other && other->start == state->end + 1 &&
|
||||
other->state == state->state) {
|
||||
if (tree->private_data)
|
||||
btrfs_merge_delalloc_extent(tree->private_data, state,
|
||||
other);
|
||||
if (tree->inode)
|
||||
btrfs_merge_delalloc_extent(tree->inode, state, other);
|
||||
state->end = other->end;
|
||||
rb_erase(&other->rb_node, &tree->state);
|
||||
RB_CLEAR_NODE(&other->rb_node);
|
||||
|
@ -374,8 +372,8 @@ static void set_state_bits(struct extent_io_tree *tree,
|
|||
u32 bits_to_set = bits & ~EXTENT_CTLBITS;
|
||||
int ret;
|
||||
|
||||
if (tree->private_data)
|
||||
btrfs_set_delalloc_extent(tree->private_data, state, bits);
|
||||
if (tree->inode)
|
||||
btrfs_set_delalloc_extent(tree->inode, state, bits);
|
||||
|
||||
ret = add_extent_changeset(state, bits_to_set, changeset, 1);
|
||||
BUG_ON(ret < 0);
|
||||
|
@ -397,7 +395,7 @@ static int insert_state(struct extent_io_tree *tree,
|
|||
u32 bits, struct extent_changeset *changeset)
|
||||
{
|
||||
struct rb_node **node;
|
||||
struct rb_node *parent;
|
||||
struct rb_node *parent = NULL;
|
||||
const u64 end = state->end;
|
||||
|
||||
set_state_bits(tree, state, bits, changeset);
|
||||
|
@ -462,8 +460,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
|
|||
struct rb_node *parent = NULL;
|
||||
struct rb_node **node;
|
||||
|
||||
if (tree->private_data)
|
||||
btrfs_split_delalloc_extent(tree->private_data, orig, split);
|
||||
if (tree->inode)
|
||||
btrfs_split_delalloc_extent(tree->inode, orig, split);
|
||||
|
||||
prealloc->start = orig->start;
|
||||
prealloc->end = split - 1;
|
||||
|
@ -510,8 +508,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
|
|||
u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
|
||||
int ret;
|
||||
|
||||
if (tree->private_data)
|
||||
btrfs_clear_delalloc_extent(tree->private_data, state, bits);
|
||||
if (tree->inode)
|
||||
btrfs_clear_delalloc_extent(tree->inode, state, bits);
|
||||
|
||||
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
|
||||
BUG_ON(ret < 0);
|
||||
|
@ -572,7 +570,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
|||
if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
|
||||
clear = 1;
|
||||
again:
|
||||
if (!prealloc && gfpflags_allow_blocking(mask)) {
|
||||
if (!prealloc) {
|
||||
/*
|
||||
* Don't care for allocation failure here because we might end
|
||||
* up not needing the pre-allocated extent state at all, which
|
||||
|
@ -636,7 +634,8 @@ hit_next:
|
|||
|
||||
if (state->start < start) {
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
if (!prealloc)
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, start);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
@ -657,7 +656,8 @@ hit_next:
|
|||
*/
|
||||
if (state->start <= end && state->end > end) {
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
if (!prealloc)
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, end + 1);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
@ -714,7 +714,8 @@ static void wait_on_state(struct extent_io_tree *tree,
|
|||
* The range [start, end] is inclusive.
|
||||
* The tree lock is taken by this function
|
||||
*/
|
||||
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits)
|
||||
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct extent_state *state;
|
||||
|
||||
|
@ -722,6 +723,16 @@ void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits)
|
|||
|
||||
spin_lock(&tree->lock);
|
||||
again:
|
||||
/*
|
||||
* Maintain cached_state, as we may not remove it from the tree if there
|
||||
* are more bits than the bits we're waiting on set on this state.
|
||||
*/
|
||||
if (cached_state && *cached_state) {
|
||||
state = *cached_state;
|
||||
if (extent_state_in_tree(state) &&
|
||||
state->start <= start && start < state->end)
|
||||
goto process_node;
|
||||
}
|
||||
while (1) {
|
||||
/*
|
||||
* This search will find all the extents that end after our
|
||||
|
@ -752,6 +763,12 @@ process_node:
|
|||
}
|
||||
}
|
||||
out:
|
||||
/* This state is no longer useful, clear it and free it up. */
|
||||
if (cached_state && *cached_state) {
|
||||
state = *cached_state;
|
||||
*cached_state = NULL;
|
||||
free_extent_state(state);
|
||||
}
|
||||
spin_unlock(&tree->lock);
|
||||
}
|
||||
|
||||
|
@ -939,13 +956,17 @@ out:
|
|||
* sleeping, so the gfp mask is used to indicate what is allowed.
|
||||
*
|
||||
* If any of the exclusive bits are set, this will fail with -EEXIST if some
|
||||
* part of the range already has the desired bits set. The start of the
|
||||
* existing range is returned in failed_start in this case.
|
||||
* part of the range already has the desired bits set. The extent_state of the
|
||||
* existing range is returned in failed_state in this case, and the start of the
|
||||
* existing range is returned in failed_start. failed_state is used as an
|
||||
* optimization for wait_extent_bit, failed_start must be used as the source of
|
||||
* truth as failed_state may have changed since we returned.
|
||||
*
|
||||
* [start, end] is inclusive This takes the tree lock.
|
||||
*/
|
||||
static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, u64 *failed_start,
|
||||
struct extent_state **failed_state,
|
||||
struct extent_state **cached_state,
|
||||
struct extent_changeset *changeset, gfp_t mask)
|
||||
{
|
||||
|
@ -964,9 +985,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
|||
if (exclusive_bits)
|
||||
ASSERT(failed_start);
|
||||
else
|
||||
ASSERT(failed_start == NULL);
|
||||
ASSERT(failed_start == NULL && failed_state == NULL);
|
||||
again:
|
||||
if (!prealloc && gfpflags_allow_blocking(mask)) {
|
||||
if (!prealloc) {
|
||||
/*
|
||||
* Don't care for allocation failure here because we might end
|
||||
* up not needing the pre-allocated extent state at all, which
|
||||
|
@ -991,7 +1012,8 @@ again:
|
|||
state = tree_search_for_insert(tree, start, &p, &parent);
|
||||
if (!state) {
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
if (!prealloc)
|
||||
goto search_again;
|
||||
prealloc->start = start;
|
||||
prealloc->end = end;
|
||||
insert_state_fast(tree, prealloc, p, parent, bits, changeset);
|
||||
|
@ -1012,6 +1034,7 @@ hit_next:
|
|||
if (state->start == start && state->end <= end) {
|
||||
if (state->state & exclusive_bits) {
|
||||
*failed_start = state->start;
|
||||
cache_state(state, failed_state);
|
||||
err = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1047,6 +1070,7 @@ hit_next:
|
|||
if (state->start < start) {
|
||||
if (state->state & exclusive_bits) {
|
||||
*failed_start = start;
|
||||
cache_state(state, failed_state);
|
||||
err = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1062,7 +1086,8 @@ hit_next:
|
|||
}
|
||||
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
if (!prealloc)
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, start);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
@ -1099,7 +1124,8 @@ hit_next:
|
|||
this_end = last_start - 1;
|
||||
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
if (!prealloc)
|
||||
goto search_again;
|
||||
|
||||
/*
|
||||
* Avoid to free 'prealloc' if it can be merged with the later
|
||||
|
@ -1125,12 +1151,14 @@ hit_next:
|
|||
if (state->start <= end && state->end > end) {
|
||||
if (state->state & exclusive_bits) {
|
||||
*failed_start = start;
|
||||
cache_state(state, failed_state);
|
||||
err = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
if (!prealloc)
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, end + 1);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
|
@ -1162,8 +1190,8 @@ out:
|
|||
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, struct extent_state **cached_state, gfp_t mask)
|
||||
{
|
||||
return __set_extent_bit(tree, start, end, bits, NULL, cached_state,
|
||||
NULL, mask);
|
||||
return __set_extent_bit(tree, start, end, bits, NULL, NULL,
|
||||
cached_state, NULL, mask);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1397,7 +1425,7 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
|||
u64 *start_ret, u64 *end_ret, u32 bits)
|
||||
{
|
||||
struct extent_state *state;
|
||||
struct extent_state *prev = NULL, *next;
|
||||
struct extent_state *prev = NULL, *next = NULL;
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
|
||||
|
@ -1487,15 +1515,37 @@ out:
|
|||
}
|
||||
|
||||
/*
|
||||
* Count the number of bytes in the tree that have a given bit(s) set. This
|
||||
* can be fairly slow, except for EXTENT_DIRTY which is cached. The total
|
||||
* number found is returned.
|
||||
* Count the number of bytes in the tree that have a given bit(s) set for a
|
||||
* given range.
|
||||
*
|
||||
* @tree: The io tree to search.
|
||||
* @start: The start offset of the range. This value is updated to the
|
||||
* offset of the first byte found with the given bit(s), so it
|
||||
* can end up being bigger than the initial value.
|
||||
* @search_end: The end offset (inclusive value) of the search range.
|
||||
* @max_bytes: The maximum byte count we are interested. The search stops
|
||||
* once it reaches this count.
|
||||
* @bits: The bits the range must have in order to be accounted for.
|
||||
* If multiple bits are set, then only subranges that have all
|
||||
* the bits set are accounted for.
|
||||
* @contig: Indicate if we should ignore holes in the range or not. If
|
||||
* this is true, then stop once we find a hole.
|
||||
* @cached_state: A cached state to be used across multiple calls to this
|
||||
* function in order to speedup searches. Use NULL if this is
|
||||
* called only once or if each call does not start where the
|
||||
* previous one ended.
|
||||
*
|
||||
* Returns the total number of bytes found within the given range that have
|
||||
* all given bits set. If the returned number of bytes is greater than zero
|
||||
* then @start is updated with the offset of the first byte with the bits set.
|
||||
*/
|
||||
u64 count_range_bits(struct extent_io_tree *tree,
|
||||
u64 *start, u64 search_end, u64 max_bytes,
|
||||
u32 bits, int contig)
|
||||
u32 bits, int contig,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct extent_state *state;
|
||||
struct extent_state *state = NULL;
|
||||
struct extent_state *cached;
|
||||
u64 cur_start = *start;
|
||||
u64 total_bytes = 0;
|
||||
u64 last = 0;
|
||||
|
@ -1506,11 +1556,41 @@ u64 count_range_bits(struct extent_io_tree *tree,
|
|||
|
||||
spin_lock(&tree->lock);
|
||||
|
||||
if (!cached_state || !*cached_state)
|
||||
goto search;
|
||||
|
||||
cached = *cached_state;
|
||||
|
||||
if (!extent_state_in_tree(cached))
|
||||
goto search;
|
||||
|
||||
if (cached->start <= cur_start && cur_start <= cached->end) {
|
||||
state = cached;
|
||||
} else if (cached->start > cur_start) {
|
||||
struct extent_state *prev;
|
||||
|
||||
/*
|
||||
* The cached state starts after our search range's start. Check
|
||||
* if the previous state record starts at or before the range we
|
||||
* are looking for, and if so, use it - this is a common case
|
||||
* when there are holes between records in the tree. If there is
|
||||
* no previous state record, we can start from our cached state.
|
||||
*/
|
||||
prev = prev_state(cached);
|
||||
if (!prev)
|
||||
state = cached;
|
||||
else if (prev->start <= cur_start && cur_start <= prev->end)
|
||||
state = prev;
|
||||
}
|
||||
|
||||
/*
|
||||
* This search will find all the extents that end after our range
|
||||
* starts.
|
||||
*/
|
||||
search:
|
||||
if (!state)
|
||||
state = tree_search(tree, cur_start);
|
||||
|
||||
while (state) {
|
||||
if (state->start > search_end)
|
||||
break;
|
||||
|
@ -1531,7 +1611,16 @@ u64 count_range_bits(struct extent_io_tree *tree,
|
|||
}
|
||||
state = next_state(state);
|
||||
}
|
||||
|
||||
if (cached_state) {
|
||||
free_extent_state(*cached_state);
|
||||
*cached_state = state;
|
||||
if (state)
|
||||
refcount_inc(&state->refs);
|
||||
}
|
||||
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
return total_bytes;
|
||||
}
|
||||
|
||||
|
@ -1598,8 +1687,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
|||
*/
|
||||
ASSERT(!(bits & EXTENT_LOCKED));
|
||||
|
||||
return __set_extent_bit(tree, start, end, bits, NULL, NULL, changeset,
|
||||
GFP_NOFS);
|
||||
return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL,
|
||||
changeset, GFP_NOFS);
|
||||
}
|
||||
|
||||
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
|
@ -1615,17 +1704,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
|||
changeset);
|
||||
}
|
||||
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached)
|
||||
{
|
||||
int err;
|
||||
u64 failed_start;
|
||||
|
||||
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
|
||||
NULL, NULL, GFP_NOFS);
|
||||
NULL, cached, NULL, GFP_NOFS);
|
||||
if (err == -EEXIST) {
|
||||
if (failed_start > start)
|
||||
clear_extent_bit(tree, start, failed_start - 1,
|
||||
EXTENT_LOCKED, NULL);
|
||||
EXTENT_LOCKED, cached);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
|
@ -1638,20 +1728,22 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
|
|||
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct extent_state *failed_state = NULL;
|
||||
int err;
|
||||
u64 failed_start;
|
||||
|
||||
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
|
||||
cached_state, NULL, GFP_NOFS);
|
||||
&failed_state, cached_state, NULL, GFP_NOFS);
|
||||
while (err == -EEXIST) {
|
||||
if (failed_start != start)
|
||||
clear_extent_bit(tree, start, failed_start - 1,
|
||||
EXTENT_LOCKED, cached_state);
|
||||
|
||||
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
|
||||
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
|
||||
&failed_state);
|
||||
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
|
||||
&failed_start, cached_state, NULL,
|
||||
GFP_NOFS);
|
||||
&failed_start, &failed_state,
|
||||
cached_state, NULL, GFP_NOFS);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -3,43 +3,48 @@
|
|||
#ifndef BTRFS_EXTENT_IO_TREE_H
|
||||
#define BTRFS_EXTENT_IO_TREE_H
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
struct extent_changeset;
|
||||
struct io_failure_record;
|
||||
|
||||
/* Bits for the extent state */
|
||||
#define EXTENT_DIRTY (1U << 0)
|
||||
#define EXTENT_UPTODATE (1U << 1)
|
||||
#define EXTENT_LOCKED (1U << 2)
|
||||
#define EXTENT_NEW (1U << 3)
|
||||
#define EXTENT_DELALLOC (1U << 4)
|
||||
#define EXTENT_DEFRAG (1U << 5)
|
||||
#define EXTENT_BOUNDARY (1U << 6)
|
||||
#define EXTENT_NODATASUM (1U << 7)
|
||||
#define EXTENT_CLEAR_META_RESV (1U << 8)
|
||||
#define EXTENT_NEED_WAIT (1U << 9)
|
||||
#define EXTENT_NORESERVE (1U << 11)
|
||||
#define EXTENT_QGROUP_RESERVED (1U << 12)
|
||||
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
|
||||
/*
|
||||
* Must be cleared only during ordered extent completion or on error paths if we
|
||||
* did not manage to submit bios and create the ordered extents for the range.
|
||||
* Should not be cleared during page release and page invalidation (if there is
|
||||
* an ordered extent in flight), that is left for the ordered extent completion.
|
||||
enum {
|
||||
ENUM_BIT(EXTENT_DIRTY),
|
||||
ENUM_BIT(EXTENT_UPTODATE),
|
||||
ENUM_BIT(EXTENT_LOCKED),
|
||||
ENUM_BIT(EXTENT_NEW),
|
||||
ENUM_BIT(EXTENT_DELALLOC),
|
||||
ENUM_BIT(EXTENT_DEFRAG),
|
||||
ENUM_BIT(EXTENT_BOUNDARY),
|
||||
ENUM_BIT(EXTENT_NODATASUM),
|
||||
ENUM_BIT(EXTENT_CLEAR_META_RESV),
|
||||
ENUM_BIT(EXTENT_NEED_WAIT),
|
||||
ENUM_BIT(EXTENT_NORESERVE),
|
||||
ENUM_BIT(EXTENT_QGROUP_RESERVED),
|
||||
ENUM_BIT(EXTENT_CLEAR_DATA_RESV),
|
||||
/*
|
||||
* Must be cleared only during ordered extent completion or on error
|
||||
* paths if we did not manage to submit bios and create the ordered
|
||||
* extents for the range. Should not be cleared during page release
|
||||
* and page invalidation (if there is an ordered extent in flight),
|
||||
* that is left for the ordered extent completion.
|
||||
*/
|
||||
#define EXTENT_DELALLOC_NEW (1U << 14)
|
||||
/*
|
||||
* When an ordered extent successfully completes for a region marked as a new
|
||||
* delalloc range, use this flag when clearing a new delalloc range to indicate
|
||||
* that the VFS' inode number of bytes should be incremented and the inode's new
|
||||
* delalloc bytes decremented, in an atomic way to prevent races with stat(2).
|
||||
ENUM_BIT(EXTENT_DELALLOC_NEW),
|
||||
/*
|
||||
* When an ordered extent successfully completes for a region marked as
|
||||
* a new delalloc range, use this flag when clearing a new delalloc
|
||||
* range to indicate that the VFS' inode number of bytes should be
|
||||
* incremented and the inode's new delalloc bytes decremented, in an
|
||||
* atomic way to prevent races with stat(2).
|
||||
*/
|
||||
#define EXTENT_ADD_INODE_BYTES (1U << 15)
|
||||
|
||||
/*
|
||||
* Set during truncate when we're clearing an entire range and we just want the
|
||||
* extent states to go away.
|
||||
ENUM_BIT(EXTENT_ADD_INODE_BYTES),
|
||||
/*
|
||||
* Set during truncate when we're clearing an entire range and we just
|
||||
* want the extent states to go away.
|
||||
*/
|
||||
#define EXTENT_CLEAR_ALL_BITS (1U << 16)
|
||||
ENUM_BIT(EXTENT_CLEAR_ALL_BITS),
|
||||
};
|
||||
|
||||
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
||||
EXTENT_CLEAR_DATA_RESV)
|
||||
|
@ -75,7 +80,8 @@ enum {
|
|||
struct extent_io_tree {
|
||||
struct rb_root state;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
void *private_data;
|
||||
/* Inode associated with this tree, or NULL. */
|
||||
struct btrfs_inode *inode;
|
||||
|
||||
/* Who owns this io tree, should be one of IO_TREE_* */
|
||||
u8 owner;
|
||||
|
@ -99,21 +105,22 @@ struct extent_state {
|
|||
};
|
||||
|
||||
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *tree, unsigned int owner,
|
||||
void *private_data);
|
||||
struct extent_io_tree *tree, unsigned int owner);
|
||||
void extent_io_tree_release(struct extent_io_tree *tree);
|
||||
|
||||
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached);
|
||||
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached);
|
||||
|
||||
int __init extent_state_init_cachep(void);
|
||||
void __cold extent_state_free_cachep(void);
|
||||
|
||||
u64 count_range_bits(struct extent_io_tree *tree,
|
||||
u64 *start, u64 search_end,
|
||||
u64 max_bytes, u32 bits, int contig);
|
||||
u64 max_bytes, u32 bits, int contig,
|
||||
struct extent_state **cached_state);
|
||||
|
||||
void free_extent_state(struct extent_state *state);
|
||||
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
|
@ -139,13 +146,6 @@ static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
|||
GFP_NOFS, NULL);
|
||||
}
|
||||
|
||||
static inline int unlock_extent_atomic(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached)
|
||||
{
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
|
||||
GFP_ATOMIC, NULL);
|
||||
}
|
||||
|
||||
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, u32 bits)
|
||||
{
|
||||
|
@ -217,13 +217,6 @@ static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
|
|||
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
|
||||
}
|
||||
|
||||
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached_state, gfp_t mask)
|
||||
{
|
||||
return set_extent_bit(tree, start, end, EXTENT_UPTODATE,
|
||||
cached_state, mask);
|
||||
}
|
||||
|
||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits,
|
||||
struct extent_state **cached_state);
|
||||
|
@ -234,6 +227,7 @@ int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
|||
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
|
||||
u64 *end, u64 max_bytes,
|
||||
struct extent_state **cached_state);
|
||||
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits);
|
||||
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
|
||||
struct extent_state **cached_state);
|
||||
|
||||
#endif /* BTRFS_EXTENT_IO_TREE_H */
|
||||
|
|
|
@ -36,6 +36,13 @@
|
|||
#include "rcu-string.h"
|
||||
#include "zoned.h"
|
||||
#include "dev-replace.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "root-tree.h"
|
||||
#include "file-item.h"
|
||||
#include "orphan.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
#undef SCRAMBLE_DELAYED_REFS
|
||||
|
||||
|
@ -5255,8 +5262,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
|||
u64 bytenr;
|
||||
u64 generation;
|
||||
u64 parent;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key first_key;
|
||||
struct btrfs_ref ref = { 0 };
|
||||
struct extent_buffer *next;
|
||||
int level = wc->level;
|
||||
|
@ -5278,7 +5285,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
|
||||
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
|
||||
|
||||
check.level = level - 1;
|
||||
check.transid = generation;
|
||||
check.owner_root = root->root_key.objectid;
|
||||
check.has_first_key = true;
|
||||
btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
|
||||
path->slots[level]);
|
||||
|
||||
next = find_extent_buffer(fs_info, bytenr);
|
||||
|
@ -5340,8 +5352,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
|||
if (!next) {
|
||||
if (reada && level == 1)
|
||||
reada_walk_down(trans, root, wc, path);
|
||||
next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
|
||||
generation, level - 1, &first_key);
|
||||
next = read_tree_block(fs_info, bytenr, &check);
|
||||
if (IS_ERR(next)) {
|
||||
return PTR_ERR(next);
|
||||
} else if (!extent_buffer_uptodate(next)) {
|
||||
|
@ -5973,40 +5984,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to account the unused space of all the readonly block group in the
|
||||
* space_info. takes mirrors into account.
|
||||
*/
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 free_bytes = 0;
|
||||
int factor;
|
||||
|
||||
/* It's df, we don't care if it's racy */
|
||||
if (list_empty(&sinfo->ro_bgs))
|
||||
return 0;
|
||||
|
||||
spin_lock(&sinfo->lock);
|
||||
list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (!block_group->ro) {
|
||||
spin_unlock(&block_group->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
factor = btrfs_bg_type_to_factor(block_group->flags);
|
||||
free_bytes += (block_group->length -
|
||||
block_group->used) * factor;
|
||||
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
spin_unlock(&sinfo->lock);
|
||||
|
||||
return free_bytes;
|
||||
}
|
||||
|
||||
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
|
@ -6072,7 +6049,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
|
|||
btrfs_warn_in_rcu(fs_info,
|
||||
"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
|
||||
start, end - start + 1,
|
||||
rcu_str_deref(device->name),
|
||||
btrfs_dev_name(device),
|
||||
device->total_bytes);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
ret = 0;
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_EXTENT_TREE_H
|
||||
#define BTRFS_EXTENT_TREE_H
|
||||
|
||||
enum btrfs_inline_ref_type {
|
||||
BTRFS_REF_TYPE_INVALID,
|
||||
BTRFS_REF_TYPE_BLOCK,
|
||||
BTRFS_REF_TYPE_DATA,
|
||||
BTRFS_REF_TYPE_ANY,
|
||||
};
|
||||
|
||||
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
|
||||
struct btrfs_extent_inline_ref *iref,
|
||||
enum btrfs_inline_ref_type is_data);
|
||||
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
|
||||
|
||||
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 num_bytes);
|
||||
void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
|
||||
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count);
|
||||
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
|
||||
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 offset, int metadata, u64 *refs, u64 *flags);
|
||||
int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
|
||||
int reserved);
|
||||
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes);
|
||||
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
|
||||
int btrfs_cross_ref_exist(struct btrfs_root *root,
|
||||
u64 objectid, u64 offset, u64 bytenr, bool strict,
|
||||
struct btrfs_path *path);
|
||||
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 parent, u64 root_objectid,
|
||||
const struct btrfs_disk_key *key,
|
||||
int level, u64 hint,
|
||||
u64 empty_size,
|
||||
enum btrfs_lock_nesting nest);
|
||||
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
u64 root_id,
|
||||
struct extent_buffer *buf,
|
||||
u64 parent, int last_ref);
|
||||
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 owner,
|
||||
u64 offset, u64 ram_bytes,
|
||||
struct btrfs_key *ins);
|
||||
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
|
||||
u64 root_objectid, u64 owner, u64 offset,
|
||||
struct btrfs_key *ins);
|
||||
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
|
||||
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
|
||||
struct btrfs_key *ins, int is_data, int delalloc);
|
||||
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct extent_buffer *buf, int full_backref);
|
||||
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct extent_buffer *buf, int full_backref);
|
||||
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *eb, u64 flags, int level);
|
||||
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
|
||||
|
||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len, int delalloc);
|
||||
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, u64 len);
|
||||
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
||||
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref);
|
||||
int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref,
|
||||
int for_reloc);
|
||||
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *node,
|
||||
struct extent_buffer *parent);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -9,6 +9,7 @@
|
|||
#include <linux/btrfs_tree.h>
|
||||
#include "compression.h"
|
||||
#include "ulist.h"
|
||||
#include "misc.h"
|
||||
|
||||
enum {
|
||||
EXTENT_BUFFER_UPTODATE,
|
||||
|
@ -29,13 +30,15 @@ enum {
|
|||
};
|
||||
|
||||
/* these are flags for __process_pages_contig */
|
||||
#define PAGE_UNLOCK (1 << 0)
|
||||
/* Page starts writeback, clear dirty bit and set writeback bit */
|
||||
#define PAGE_START_WRITEBACK (1 << 1)
|
||||
#define PAGE_END_WRITEBACK (1 << 2)
|
||||
#define PAGE_SET_ORDERED (1 << 3)
|
||||
#define PAGE_SET_ERROR (1 << 4)
|
||||
#define PAGE_LOCK (1 << 5)
|
||||
enum {
|
||||
ENUM_BIT(PAGE_UNLOCK),
|
||||
/* Page starts writeback, clear dirty bit and set writeback bit */
|
||||
ENUM_BIT(PAGE_START_WRITEBACK),
|
||||
ENUM_BIT(PAGE_END_WRITEBACK),
|
||||
ENUM_BIT(PAGE_SET_ORDERED),
|
||||
ENUM_BIT(PAGE_SET_ERROR),
|
||||
ENUM_BIT(PAGE_LOCK),
|
||||
};
|
||||
|
||||
/*
|
||||
* page->private values. Every page that is controlled by the extent
|
||||
|
@ -63,17 +66,11 @@ struct btrfs_inode;
|
|||
struct btrfs_fs_info;
|
||||
struct io_failure_record;
|
||||
struct extent_io_tree;
|
||||
struct btrfs_tree_parent_check;
|
||||
|
||||
int __init extent_buffer_init_cachep(void);
|
||||
void __cold extent_buffer_free_cachep(void);
|
||||
|
||||
typedef void (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
|
||||
int mirror_num,
|
||||
enum btrfs_compression_type compress_type);
|
||||
|
||||
typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode,
|
||||
struct bio *bio, u64 dio_file_offset);
|
||||
|
||||
#define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
|
||||
struct extent_buffer {
|
||||
u64 start;
|
||||
|
@ -98,6 +95,39 @@ struct extent_buffer {
|
|||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Get the correct offset inside the page of extent buffer.
|
||||
*
|
||||
* @eb: target extent buffer
|
||||
* @start: offset inside the extent buffer
|
||||
*
|
||||
* Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
|
||||
*/
|
||||
static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb,
|
||||
unsigned long offset)
|
||||
{
|
||||
/*
|
||||
* For sectorsize == PAGE_SIZE case, eb->start will always be aligned
|
||||
* to PAGE_SIZE, thus adding it won't cause any difference.
|
||||
*
|
||||
* For sectorsize < PAGE_SIZE, we must only read the data that belongs
|
||||
* to the eb, thus we have to take the eb->start into consideration.
|
||||
*/
|
||||
return offset_in_page(offset + eb->start);
|
||||
}
|
||||
|
||||
static inline unsigned long get_eb_page_index(unsigned long offset)
|
||||
{
|
||||
/*
|
||||
* For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough.
|
||||
*
|
||||
* For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE,
|
||||
* and have ensured that all tree blocks are contained in one page,
|
||||
* thus we always get index == 0.
|
||||
*/
|
||||
return offset >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Structure to record how many bytes and which ranges are set/cleared
|
||||
*/
|
||||
|
@ -174,8 +204,8 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
|
|||
#define WAIT_NONE 0
|
||||
#define WAIT_COMPLETE 1
|
||||
#define WAIT_PAGE_LOCK 2
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
|
||||
int mirror_num);
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
|
||||
struct btrfs_tree_parent_check *parent_check);
|
||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
|
||||
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 owner_root, u64 gen, int level);
|
||||
|
@ -248,7 +278,6 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
|
|||
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
|
||||
|
||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
|
||||
|
||||
/*
|
||||
* When IO fails, either with EIO or csum verification fails, we
|
||||
|
@ -272,9 +301,9 @@ struct io_failure_record {
|
|||
int num_copies;
|
||||
};
|
||||
|
||||
int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
|
||||
int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_bbio,
|
||||
u32 bio_offset, struct page *page, unsigned int pgoff,
|
||||
submit_bio_hook_t *submit_bio_hook);
|
||||
bool submit_buffered);
|
||||
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
|
||||
struct page *page, unsigned int pg_offset);
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "volumes.h"
|
||||
#include "extent_map.h"
|
||||
|
@ -27,12 +28,9 @@ void __cold extent_map_exit(void)
|
|||
kmem_cache_destroy(extent_map_cache);
|
||||
}
|
||||
|
||||
/**
|
||||
* extent_map_tree_init - initialize extent map tree
|
||||
* @tree: tree to initialize
|
||||
*
|
||||
* Initialize the extent tree @tree. Should be called for each new inode
|
||||
* or other user of the extent_map interface.
|
||||
/*
|
||||
* Initialize the extent tree @tree. Should be called for each new inode or
|
||||
* other user of the extent_map interface.
|
||||
*/
|
||||
void extent_map_tree_init(struct extent_map_tree *tree)
|
||||
{
|
||||
|
@ -41,12 +39,9 @@ void extent_map_tree_init(struct extent_map_tree *tree)
|
|||
rwlock_init(&tree->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* alloc_extent_map - allocate new extent map structure
|
||||
*
|
||||
* Allocate a new extent_map structure. The new structure is
|
||||
* returned with a reference count of one and needs to be
|
||||
* freed using free_extent_map()
|
||||
/*
|
||||
* Allocate a new extent_map structure. The new structure is returned with a
|
||||
* reference count of one and needs to be freed using free_extent_map()
|
||||
*/
|
||||
struct extent_map *alloc_extent_map(void)
|
||||
{
|
||||
|
@ -61,12 +56,9 @@ struct extent_map *alloc_extent_map(void)
|
|||
return em;
|
||||
}
|
||||
|
||||
/**
|
||||
* free_extent_map - drop reference count of an extent_map
|
||||
* @em: extent map being released
|
||||
*
|
||||
* Drops the reference out on @em by one and free the structure
|
||||
* if the reference count hits zero.
|
||||
/*
|
||||
* Drop the reference out on @em by one and free the structure if the reference
|
||||
* count hits zero.
|
||||
*/
|
||||
void free_extent_map(struct extent_map *em)
|
||||
{
|
||||
|
@ -81,7 +73,7 @@ void free_extent_map(struct extent_map *em)
|
|||
}
|
||||
}
|
||||
|
||||
/* simple helper to do math around the end of an extent, handling wrap */
|
||||
/* Do the math around the end of an extent, handling wrapping. */
|
||||
static u64 range_end(u64 start, u64 len)
|
||||
{
|
||||
if (start + len < start)
|
||||
|
@ -137,8 +129,8 @@ static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
|
|||
}
|
||||
|
||||
/*
|
||||
* search through the tree for an extent_map with a given offset. If
|
||||
* it can't be found, try to find some neighboring extents
|
||||
* Search through the tree for an extent_map with a given offset. If it can't
|
||||
* be found, try to find some neighboring extents
|
||||
*/
|
||||
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||
struct rb_node **prev_or_next_ret)
|
||||
|
@ -190,7 +182,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* check to see if two extent_map structs are adjacent and safe to merge */
|
||||
/* Check to see if two extent_map structs are adjacent and safe to merge. */
|
||||
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
||||
{
|
||||
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
|
||||
|
@ -288,8 +280,9 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unpin_extent_cache - unpin an extent from the cache
|
||||
/*
|
||||
* Unpin an extent from the cache.
|
||||
*
|
||||
* @tree: tree to unpin the extent in
|
||||
* @start: logical offset in the file
|
||||
* @len: length of the extent
|
||||
|
@ -392,7 +385,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Add new extent map to the extent tree
|
||||
*
|
||||
* @tree: tree to insert new map in
|
||||
|
@ -451,8 +444,9 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
|
|||
return em;
|
||||
}
|
||||
|
||||
/**
|
||||
* lookup_extent_mapping - lookup extent_map
|
||||
/*
|
||||
* Lookup extent_map that intersects @start + @len range.
|
||||
*
|
||||
* @tree: tree to lookup in
|
||||
* @start: byte offset to start the search
|
||||
* @len: length of the lookup range
|
||||
|
@ -468,8 +462,9 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
|
|||
return __lookup_extent_mapping(tree, start, len, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* search_extent_mapping - find a nearby extent map
|
||||
/*
|
||||
* Find a nearby extent map intersecting @start + @len (not an exact search).
|
||||
*
|
||||
* @tree: tree to lookup in
|
||||
* @start: byte offset to start the search
|
||||
* @len: length of the lookup range
|
||||
|
@ -485,13 +480,14 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
|
|||
return __lookup_extent_mapping(tree, start, len, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* remove_extent_mapping - removes an extent_map from the extent tree
|
||||
/*
|
||||
* Remove an extent_map from the extent tree.
|
||||
*
|
||||
* @tree: extent tree to remove from
|
||||
* @em: extent map being removed
|
||||
*
|
||||
* Removes @em from @tree. No reference counts are dropped, and no checks
|
||||
* are done to see if the range is in use
|
||||
* Remove @em from @tree. No reference counts are dropped, and no checks
|
||||
* are done to see if the range is in use.
|
||||
*/
|
||||
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
|
||||
{
|
||||
|
@ -523,7 +519,7 @@ void replace_extent_mapping(struct extent_map_tree *tree,
|
|||
setup_extent_mapping(tree, new, modified);
|
||||
}
|
||||
|
||||
static struct extent_map *next_extent_map(struct extent_map *em)
|
||||
static struct extent_map *next_extent_map(const struct extent_map *em)
|
||||
{
|
||||
struct rb_node *next;
|
||||
|
||||
|
@ -585,8 +581,8 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
|
|||
return add_extent_mapping(em_tree, em, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add extent mapping into em_tree
|
||||
/*
|
||||
* Add extent mapping into em_tree.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @em_tree: extent tree into which we want to insert the extent mapping
|
||||
|
@ -613,6 +609,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
|
|||
int ret;
|
||||
struct extent_map *em = *em_in;
|
||||
|
||||
/*
|
||||
* Tree-checker should have rejected any inline extent with non-zero
|
||||
* file offset. Here just do a sanity check.
|
||||
*/
|
||||
if (em->block_start == EXTENT_MAP_INLINE)
|
||||
ASSERT(em->start == 0);
|
||||
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
/* it is possible that someone inserted the extent into the tree
|
||||
* while we had the lock dropped. It is also possible that
|
||||
|
|
|
@ -9,13 +9,18 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "messages.h"
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "volumes.h"
|
||||
#include "bio.h"
|
||||
#include "print-tree.h"
|
||||
#include "compression.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "file-item.h"
|
||||
#include "super.h"
|
||||
|
||||
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
|
||||
sizeof(struct btrfs_item) * 2) / \
|
||||
|
@ -24,8 +29,8 @@
|
|||
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
|
||||
PAGE_SIZE))
|
||||
|
||||
/**
|
||||
* Set inode's size according to filesystem options
|
||||
/*
|
||||
* Set inode's size according to filesystem options.
|
||||
*
|
||||
* @inode: inode we want to update the disk_i_size for
|
||||
* @new_i_size: i_size we want to set to, 0 if we use i_size
|
||||
|
@ -64,8 +69,8 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
|
|||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark range within a file as having a new extent inserted
|
||||
/*
|
||||
* Mark range within a file as having a new extent inserted.
|
||||
*
|
||||
* @inode: inode being modified
|
||||
* @start: start file offset of the file extent we've inserted
|
||||
|
@ -92,8 +97,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
|||
EXTENT_DIRTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks an inode range as not having a backing extent
|
||||
/*
|
||||
* Mark an inode range as not having a backing extent.
|
||||
*
|
||||
* @inode: inode being modified
|
||||
* @start: start file offset of the file extent we've inserted
|
||||
|
@ -121,12 +126,26 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
|||
start + len - 1, EXTENT_DIRTY, NULL);
|
||||
}
|
||||
|
||||
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
|
||||
u16 csum_size)
|
||||
static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes)
|
||||
{
|
||||
u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size;
|
||||
ASSERT(IS_ALIGNED(bytes, fs_info->sectorsize));
|
||||
|
||||
return ncsums * fs_info->sectorsize;
|
||||
return (bytes >> fs_info->sectorsize_bits) * fs_info->csum_size;
|
||||
}
|
||||
|
||||
static size_t csum_size_to_bytes(const struct btrfs_fs_info *fs_info, u32 csum_size)
|
||||
{
|
||||
ASSERT(IS_ALIGNED(csum_size, fs_info->csum_size));
|
||||
|
||||
return (csum_size / fs_info->csum_size) << fs_info->sectorsize_bits;
|
||||
}
|
||||
|
||||
static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
u32 max_csum_size = round_down(PAGE_SIZE - sizeof(struct btrfs_ordered_sum),
|
||||
fs_info->csum_size);
|
||||
|
||||
return csum_size_to_bytes(fs_info, max_csum_size);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -135,9 +154,7 @@ static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
|
|||
*/
|
||||
static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes)
|
||||
{
|
||||
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
|
||||
|
||||
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
|
||||
return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes);
|
||||
}
|
||||
|
||||
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
|
||||
|
@ -254,7 +271,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
|||
|
||||
/*
|
||||
* Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and
|
||||
* estore the result to @dst.
|
||||
* store the result to @dst.
|
||||
*
|
||||
* Return >0 for the number of sectors we found.
|
||||
* Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum
|
||||
|
@ -360,7 +377,7 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Lookup the checksum for the read bio in csum tree.
|
||||
*
|
||||
* @inode: inode that the bio is for.
|
||||
|
@ -510,7 +527,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
|||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list, int search_commit,
|
||||
bool nowait)
|
||||
{
|
||||
|
@ -521,11 +538,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_csum_item *item;
|
||||
LIST_HEAD(tmplist);
|
||||
unsigned long offset;
|
||||
int ret;
|
||||
size_t size;
|
||||
u64 csum_end;
|
||||
const u32 csum_size = fs_info->csum_size;
|
||||
|
||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||
IS_ALIGNED(end + 1, fs_info->sectorsize));
|
||||
|
@ -551,16 +564,33 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
if (ret > 0 && path->slots[0] > 0) {
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
|
||||
|
||||
/*
|
||||
* There are two cases we can hit here for the previous csum
|
||||
* item:
|
||||
*
|
||||
* |<- search range ->|
|
||||
* |<- csum item ->|
|
||||
*
|
||||
* Or
|
||||
* |<- search range ->|
|
||||
* |<- csum item ->|
|
||||
*
|
||||
* Check if the previous csum item covers the leading part of
|
||||
* the search range. If so we have to start from previous csum
|
||||
* item.
|
||||
*/
|
||||
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
|
||||
key.type == BTRFS_EXTENT_CSUM_KEY) {
|
||||
offset = (start - key.offset) >> fs_info->sectorsize_bits;
|
||||
if (offset * csum_size <
|
||||
if (bytes_to_csum_size(fs_info, start - key.offset) <
|
||||
btrfs_item_size(leaf, path->slots[0] - 1))
|
||||
path->slots[0]--;
|
||||
}
|
||||
}
|
||||
|
||||
while (start <= end) {
|
||||
u64 csum_end;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
|
@ -580,8 +610,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
if (key.offset > start)
|
||||
start = key.offset;
|
||||
|
||||
size = btrfs_item_size(leaf, path->slots[0]);
|
||||
csum_end = key.offset + (size / csum_size) * fs_info->sectorsize;
|
||||
csum_end = key.offset + csum_size_to_bytes(fs_info,
|
||||
btrfs_item_size(leaf, path->slots[0]));
|
||||
if (csum_end <= start) {
|
||||
path->slots[0]++;
|
||||
continue;
|
||||
|
@ -591,8 +621,11 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_csum_item);
|
||||
while (start < csum_end) {
|
||||
unsigned long offset;
|
||||
size_t size;
|
||||
|
||||
size = min_t(size_t, csum_end - start,
|
||||
max_ordered_sum_bytes(fs_info, csum_size));
|
||||
max_ordered_sum_bytes(fs_info));
|
||||
sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
|
||||
GFP_NOFS);
|
||||
if (!sums) {
|
||||
|
@ -603,16 +636,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
sums->bytenr = start;
|
||||
sums->len = (int)size;
|
||||
|
||||
offset = (start - key.offset) >> fs_info->sectorsize_bits;
|
||||
offset *= csum_size;
|
||||
size >>= fs_info->sectorsize_bits;
|
||||
offset = bytes_to_csum_size(fs_info, start - key.offset);
|
||||
|
||||
read_extent_buffer(path->nodes[0],
|
||||
sums->sums,
|
||||
((unsigned long)item) + offset,
|
||||
csum_size * size);
|
||||
bytes_to_csum_size(fs_info, size));
|
||||
|
||||
start += fs_info->sectorsize * size;
|
||||
start += size;
|
||||
list_add_tail(&sums->list, &tmplist);
|
||||
}
|
||||
path->slots[0]++;
|
||||
|
@ -630,8 +661,129 @@ fail:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate checksums of the data contained inside a bio
|
||||
/*
|
||||
* Do the same work as btrfs_lookup_csums_list(), the difference is in how
|
||||
* we return the result.
|
||||
*
|
||||
* This version will set the corresponding bits in @csum_bitmap to represent
|
||||
* that there is a csum found.
|
||||
* Each bit represents a sector. Thus caller should ensure @csum_buf passed
|
||||
* in is large enough to contain all csums.
|
||||
*/
|
||||
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
|
||||
u8 *csum_buf, unsigned long *csum_bitmap)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_csum_item *item;
|
||||
const u64 orig_start = start;
|
||||
int ret;
|
||||
|
||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||
IS_ALIGNED(end + 1, fs_info->sectorsize));
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
key.offset = start;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
if (ret > 0 && path->slots[0] > 0) {
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
|
||||
|
||||
/*
|
||||
* There are two cases we can hit here for the previous csum
|
||||
* item:
|
||||
*
|
||||
* |<- search range ->|
|
||||
* |<- csum item ->|
|
||||
*
|
||||
* Or
|
||||
* |<- search range ->|
|
||||
* |<- csum item ->|
|
||||
*
|
||||
* Check if the previous csum item covers the leading part of
|
||||
* the search range. If so we have to start from previous csum
|
||||
* item.
|
||||
*/
|
||||
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
|
||||
key.type == BTRFS_EXTENT_CSUM_KEY) {
|
||||
if (bytes_to_csum_size(fs_info, start - key.offset) <
|
||||
btrfs_item_size(leaf, path->slots[0] - 1))
|
||||
path->slots[0]--;
|
||||
}
|
||||
}
|
||||
|
||||
while (start <= end) {
|
||||
u64 csum_end;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
if (ret > 0)
|
||||
break;
|
||||
leaf = path->nodes[0];
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
||||
key.type != BTRFS_EXTENT_CSUM_KEY ||
|
||||
key.offset > end)
|
||||
break;
|
||||
|
||||
if (key.offset > start)
|
||||
start = key.offset;
|
||||
|
||||
csum_end = key.offset + csum_size_to_bytes(fs_info,
|
||||
btrfs_item_size(leaf, path->slots[0]));
|
||||
if (csum_end <= start) {
|
||||
path->slots[0]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
csum_end = min(csum_end, end + 1);
|
||||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_csum_item);
|
||||
while (start < csum_end) {
|
||||
unsigned long offset;
|
||||
size_t size;
|
||||
u8 *csum_dest = csum_buf + bytes_to_csum_size(fs_info,
|
||||
start - orig_start);
|
||||
|
||||
size = min_t(size_t, csum_end - start, end + 1 - start);
|
||||
|
||||
offset = bytes_to_csum_size(fs_info, start - key.offset);
|
||||
|
||||
read_extent_buffer(path->nodes[0], csum_dest,
|
||||
((unsigned long)item) + offset,
|
||||
bytes_to_csum_size(fs_info, size));
|
||||
|
||||
bitmap_set(csum_bitmap,
|
||||
(start - orig_start) >> fs_info->sectorsize_bits,
|
||||
size >> fs_info->sectorsize_bits);
|
||||
|
||||
start += size;
|
||||
}
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = 0;
|
||||
fail:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate checksums of the data contained inside a bio.
|
||||
*
|
||||
* @inode: Owner of the data inside the bio
|
||||
* @bio: Contains the data to be checksummed
|
||||
|
@ -746,15 +898,16 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
|||
}
|
||||
|
||||
/*
|
||||
* helper function for csum removal, this expects the
|
||||
* key to describe the csum pointed to by the path, and it expects
|
||||
* the csum to overlap the range [bytenr, len]
|
||||
* Remove one checksum overlapping a range.
|
||||
*
|
||||
* The csum should not be entirely contained in the range and the
|
||||
* range should not be entirely contained in the csum.
|
||||
* This expects the key to describe the csum pointed to by the path, and it
|
||||
* expects the csum to overlap the range [bytenr, len]
|
||||
*
|
||||
* This calls btrfs_truncate_item with the correct args based on the
|
||||
* overlap, and fixes up the key as required.
|
||||
* The csum should not be entirely contained in the range and the range should
|
||||
* not be entirely contained in the csum.
|
||||
*
|
||||
* This calls btrfs_truncate_item with the correct args based on the overlap,
|
||||
* and fixes up the key as required.
|
||||
*/
|
||||
static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
|
@ -803,8 +956,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
|
||||
/*
|
||||
* deletes the csum items from the csum tree for a given
|
||||
* range of bytes.
|
||||
* Delete the csum items from the csum tree for a given range of bytes.
|
||||
*/
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 bytenr, u64 len)
|
||||
|
@ -1209,7 +1361,6 @@ out:
|
|||
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
const struct btrfs_path *path,
|
||||
struct btrfs_file_extent_item *fi,
|
||||
const bool new_inline,
|
||||
struct extent_map *em)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
|
@ -1261,10 +1412,9 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
|||
*/
|
||||
em->orig_start = EXTENT_MAP_HOLE;
|
||||
em->block_len = (u64)-1;
|
||||
if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
|
||||
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
em->compress_type = compress_type;
|
||||
}
|
||||
if (compress_type != BTRFS_COMPRESS_NONE)
|
||||
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
} else {
|
||||
btrfs_err(fs_info,
|
||||
"unknown file extent item type %d, inode %llu, offset %llu, "
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_FILE_ITEM_H
|
||||
#define BTRFS_FILE_ITEM_H
|
||||
|
||||
#include "accessors.h"
|
||||
|
||||
#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
|
||||
(offsetof(struct btrfs_file_extent_item, disk_bytenr))
|
||||
|
||||
static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
|
||||
{
|
||||
return BTRFS_MAX_ITEM_SIZE(info) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of bytes used by the item on disk, minus the size of any
|
||||
* extent headers. If a file is compressed on disk, this is the compressed
|
||||
* size.
|
||||
*/
|
||||
static inline u32 btrfs_file_extent_inline_item_len(
|
||||
const struct extent_buffer *eb,
|
||||
int nr)
|
||||
{
|
||||
return btrfs_item_size(eb, nr) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
|
||||
}
|
||||
|
||||
static inline unsigned long btrfs_file_extent_inline_start(
|
||||
const struct btrfs_file_extent_item *e)
|
||||
{
|
||||
return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START;
|
||||
}
|
||||
|
||||
static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
|
||||
{
|
||||
return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize;
|
||||
}
|
||||
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 bytenr, u64 len);
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
|
||||
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 objectid, u64 pos,
|
||||
u64 num_bytes);
|
||||
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid,
|
||||
u64 bytenr, int mod);
|
||||
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_ordered_sum *sums);
|
||||
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
u64 offset, bool one_ordered);
|
||||
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list, int search_commit,
|
||||
bool nowait);
|
||||
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
|
||||
u8 *csum_buf, unsigned long *csum_bitmap);
|
||||
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
const struct btrfs_path *path,
|
||||
struct btrfs_file_extent_item *fi,
|
||||
struct extent_map *em);
|
||||
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len);
|
||||
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, u64 len);
|
||||
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size);
|
||||
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
||||
|
||||
#endif
|
609
fs/btrfs/file.c
609
fs/btrfs/file.c
|
@ -30,329 +30,13 @@
|
|||
#include "delalloc-space.h"
|
||||
#include "reflink.h"
|
||||
#include "subpage.h"
|
||||
|
||||
static struct kmem_cache *btrfs_inode_defrag_cachep;
|
||||
/*
|
||||
* when auto defrag is enabled we
|
||||
* queue up these defrag structs to remember which
|
||||
* inodes need defragging passes
|
||||
*/
|
||||
struct inode_defrag {
|
||||
struct rb_node rb_node;
|
||||
/* objectid */
|
||||
u64 ino;
|
||||
/*
|
||||
* transid where the defrag was added, we search for
|
||||
* extents newer than this
|
||||
*/
|
||||
u64 transid;
|
||||
|
||||
/* root objectid */
|
||||
u64 root;
|
||||
|
||||
/*
|
||||
* The extent size threshold for autodefrag.
|
||||
*
|
||||
* This value is different for compressed/non-compressed extents,
|
||||
* thus needs to be passed from higher layer.
|
||||
* (aka, inode_should_defrag())
|
||||
*/
|
||||
u32 extent_thresh;
|
||||
};
|
||||
|
||||
static int __compare_inode_defrag(struct inode_defrag *defrag1,
|
||||
struct inode_defrag *defrag2)
|
||||
{
|
||||
if (defrag1->root > defrag2->root)
|
||||
return 1;
|
||||
else if (defrag1->root < defrag2->root)
|
||||
return -1;
|
||||
else if (defrag1->ino > defrag2->ino)
|
||||
return 1;
|
||||
else if (defrag1->ino < defrag2->ino)
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* pop a record for an inode into the defrag tree. The lock
|
||||
* must be held already
|
||||
*
|
||||
* If you're inserting a record for an older transid than an
|
||||
* existing record, the transid already in the tree is lowered
|
||||
*
|
||||
* If an existing record is found the defrag item you
|
||||
* pass in is freed
|
||||
*/
|
||||
static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
|
||||
struct inode_defrag *defrag)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct inode_defrag *entry;
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
int ret;
|
||||
|
||||
p = &fs_info->defrag_inodes.rb_node;
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct inode_defrag, rb_node);
|
||||
|
||||
ret = __compare_inode_defrag(defrag, entry);
|
||||
if (ret < 0)
|
||||
p = &parent->rb_left;
|
||||
else if (ret > 0)
|
||||
p = &parent->rb_right;
|
||||
else {
|
||||
/* if we're reinserting an entry for
|
||||
* an old defrag run, make sure to
|
||||
* lower the transid of our existing record
|
||||
*/
|
||||
if (defrag->transid < entry->transid)
|
||||
entry->transid = defrag->transid;
|
||||
entry->extent_thresh = min(defrag->extent_thresh,
|
||||
entry->extent_thresh);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
|
||||
rb_link_node(&defrag->rb_node, parent, p);
|
||||
rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
|
||||
return 0;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* insert a defrag record for this inode if auto defrag is
|
||||
* enabled
|
||||
*/
|
||||
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode, u32 extent_thresh)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct inode_defrag *defrag;
|
||||
u64 transid;
|
||||
int ret;
|
||||
|
||||
if (!__need_auto_defrag(fs_info))
|
||||
return 0;
|
||||
|
||||
if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
|
||||
return 0;
|
||||
|
||||
if (trans)
|
||||
transid = trans->transid;
|
||||
else
|
||||
transid = inode->root->last_trans;
|
||||
|
||||
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
|
||||
if (!defrag)
|
||||
return -ENOMEM;
|
||||
|
||||
defrag->ino = btrfs_ino(inode);
|
||||
defrag->transid = transid;
|
||||
defrag->root = root->root_key.objectid;
|
||||
defrag->extent_thresh = extent_thresh;
|
||||
|
||||
spin_lock(&fs_info->defrag_inodes_lock);
|
||||
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
|
||||
/*
|
||||
* If we set IN_DEFRAG flag and evict the inode from memory,
|
||||
* and then re-read this inode, this new inode doesn't have
|
||||
* IN_DEFRAG flag. At the case, we may find the existed defrag.
|
||||
*/
|
||||
ret = __btrfs_add_inode_defrag(inode, defrag);
|
||||
if (ret)
|
||||
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
|
||||
} else {
|
||||
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
|
||||
}
|
||||
spin_unlock(&fs_info->defrag_inodes_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pick the defragable inode that we want, if it doesn't exist, we will get
|
||||
* the next one.
|
||||
*/
|
||||
static struct inode_defrag *
|
||||
btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
|
||||
{
|
||||
struct inode_defrag *entry = NULL;
|
||||
struct inode_defrag tmp;
|
||||
struct rb_node *p;
|
||||
struct rb_node *parent = NULL;
|
||||
int ret;
|
||||
|
||||
tmp.ino = ino;
|
||||
tmp.root = root;
|
||||
|
||||
spin_lock(&fs_info->defrag_inodes_lock);
|
||||
p = fs_info->defrag_inodes.rb_node;
|
||||
while (p) {
|
||||
parent = p;
|
||||
entry = rb_entry(parent, struct inode_defrag, rb_node);
|
||||
|
||||
ret = __compare_inode_defrag(&tmp, entry);
|
||||
if (ret < 0)
|
||||
p = parent->rb_left;
|
||||
else if (ret > 0)
|
||||
p = parent->rb_right;
|
||||
else
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
|
||||
parent = rb_next(parent);
|
||||
if (parent)
|
||||
entry = rb_entry(parent, struct inode_defrag, rb_node);
|
||||
else
|
||||
entry = NULL;
|
||||
}
|
||||
out:
|
||||
if (entry)
|
||||
rb_erase(parent, &fs_info->defrag_inodes);
|
||||
spin_unlock(&fs_info->defrag_inodes_lock);
|
||||
return entry;
|
||||
}
|
||||
|
||||
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct inode_defrag *defrag;
|
||||
struct rb_node *node;
|
||||
|
||||
spin_lock(&fs_info->defrag_inodes_lock);
|
||||
node = rb_first(&fs_info->defrag_inodes);
|
||||
while (node) {
|
||||
rb_erase(node, &fs_info->defrag_inodes);
|
||||
defrag = rb_entry(node, struct inode_defrag, rb_node);
|
||||
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
|
||||
|
||||
cond_resched_lock(&fs_info->defrag_inodes_lock);
|
||||
|
||||
node = rb_first(&fs_info->defrag_inodes);
|
||||
}
|
||||
spin_unlock(&fs_info->defrag_inodes_lock);
|
||||
}
|
||||
|
||||
#define BTRFS_DEFRAG_BATCH 1024
|
||||
|
||||
static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
|
||||
struct inode_defrag *defrag)
|
||||
{
|
||||
struct btrfs_root *inode_root;
|
||||
struct inode *inode;
|
||||
struct btrfs_ioctl_defrag_range_args range;
|
||||
int ret = 0;
|
||||
u64 cur = 0;
|
||||
|
||||
again:
|
||||
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
|
||||
goto cleanup;
|
||||
if (!__need_auto_defrag(fs_info))
|
||||
goto cleanup;
|
||||
|
||||
/* get the inode */
|
||||
inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
|
||||
if (IS_ERR(inode_root)) {
|
||||
ret = PTR_ERR(inode_root);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
|
||||
btrfs_put_root(inode_root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (cur >= i_size_read(inode)) {
|
||||
iput(inode);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* do a chunk of defrag */
|
||||
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
|
||||
memset(&range, 0, sizeof(range));
|
||||
range.len = (u64)-1;
|
||||
range.start = cur;
|
||||
range.extent_thresh = defrag->extent_thresh;
|
||||
|
||||
sb_start_write(fs_info->sb);
|
||||
ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
|
||||
BTRFS_DEFRAG_BATCH);
|
||||
sb_end_write(fs_info->sb);
|
||||
iput(inode);
|
||||
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
|
||||
cur = max(cur + fs_info->sectorsize, range.start);
|
||||
goto again;
|
||||
|
||||
cleanup:
|
||||
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* run through the list of inodes in the FS that need
|
||||
* defragging
|
||||
*/
|
||||
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct inode_defrag *defrag;
|
||||
u64 first_ino = 0;
|
||||
u64 root_objectid = 0;
|
||||
|
||||
atomic_inc(&fs_info->defrag_running);
|
||||
while (1) {
|
||||
/* Pause the auto defragger. */
|
||||
if (test_bit(BTRFS_FS_STATE_REMOUNTING,
|
||||
&fs_info->fs_state))
|
||||
break;
|
||||
|
||||
if (!__need_auto_defrag(fs_info))
|
||||
break;
|
||||
|
||||
/* find an inode to defrag */
|
||||
defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
|
||||
first_ino);
|
||||
if (!defrag) {
|
||||
if (root_objectid || first_ino) {
|
||||
root_objectid = 0;
|
||||
first_ino = 0;
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
first_ino = defrag->ino + 1;
|
||||
root_objectid = defrag->root;
|
||||
|
||||
__btrfs_run_defrag_inode(fs_info, defrag);
|
||||
}
|
||||
atomic_dec(&fs_info->defrag_running);
|
||||
|
||||
/*
|
||||
* during unmount, we use the transaction_wait queue to
|
||||
* wait for the defragger to stop
|
||||
*/
|
||||
wake_up(&fs_info->transaction_wait);
|
||||
return 0;
|
||||
}
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "file-item.h"
|
||||
#include "ioctl.h"
|
||||
#include "file.h"
|
||||
#include "super.h"
|
||||
|
||||
/* simple helper to fault in pages and copy. This should go away
|
||||
* and be replaced with calls into generic code.
|
||||
|
@ -696,7 +380,10 @@ next_slot:
|
|||
args->start - extent_offset,
|
||||
0, false);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
key.offset = args->start;
|
||||
}
|
||||
|
@ -783,7 +470,10 @@ delete_extent_item:
|
|||
key.offset - extent_offset, 0,
|
||||
false);
|
||||
ret = btrfs_free_extent(trans, &ref);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
break;
|
||||
}
|
||||
args->bytes_found += extent_end - key.offset;
|
||||
}
|
||||
|
||||
|
@ -1302,7 +992,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
|||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
if (nowait) {
|
||||
if (!try_lock_extent(&inode->io_tree, start_pos, last_pos)) {
|
||||
if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
|
||||
cached_state)) {
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
unlock_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
|
@ -1372,6 +1063,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
|||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 lockstart, lockend;
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
|
@ -1388,12 +1080,14 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
|||
num_bytes = lockend - lockstart + 1;
|
||||
|
||||
if (nowait) {
|
||||
if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend)) {
|
||||
if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend,
|
||||
&cached_state)) {
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
} else {
|
||||
btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, NULL);
|
||||
btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend,
|
||||
&cached_state);
|
||||
}
|
||||
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
||||
NULL, NULL, NULL, nowait, false);
|
||||
|
@ -1402,7 +1096,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
|||
else
|
||||
*write_bytes = min_t(size_t, *write_bytes ,
|
||||
num_bytes - pos + lockstart);
|
||||
unlock_extent(&inode->io_tree, lockstart, lockend, NULL);
|
||||
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1505,7 +1199,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
|
|||
if (nowait)
|
||||
ilock_flags |= BTRFS_ILOCK_TRY;
|
||||
|
||||
ret = btrfs_inode_lock(inode, ilock_flags);
|
||||
ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
@ -1740,7 +1434,7 @@ again:
|
|||
iocb->ki_pos += num_written;
|
||||
}
|
||||
out:
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
return num_written ? num_written : ret;
|
||||
}
|
||||
|
||||
|
@ -1780,19 +1474,19 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
ilock_flags |= BTRFS_ILOCK_SHARED;
|
||||
|
||||
relock:
|
||||
err = btrfs_inode_lock(inode, ilock_flags);
|
||||
err = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = generic_write_checks(iocb, from);
|
||||
if (err <= 0) {
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = btrfs_write_check(iocb, from, err);
|
||||
if (err < 0) {
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -1803,13 +1497,13 @@ relock:
|
|||
*/
|
||||
if ((ilock_flags & BTRFS_ILOCK_SHARED) &&
|
||||
pos + iov_iter_count(from) > i_size_read(inode)) {
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
ilock_flags &= ~BTRFS_ILOCK_SHARED;
|
||||
goto relock;
|
||||
}
|
||||
|
||||
if (check_direct_IO(fs_info, from, pos)) {
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
goto buffered;
|
||||
}
|
||||
|
||||
|
@ -1840,7 +1534,7 @@ relock:
|
|||
* iocb, and that needs to lock the inode. So unlock it before calling
|
||||
* iomap_dio_complete() to avoid a deadlock.
|
||||
*/
|
||||
btrfs_inode_unlock(inode, ilock_flags);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
|
||||
if (IS_ERR_OR_NULL(dio))
|
||||
err = PTR_ERR_OR_ZERO(dio);
|
||||
|
@ -1887,8 +1581,8 @@ buffered:
|
|||
/*
|
||||
* If we are in a NOWAIT context, then return -EAGAIN to signal the caller
|
||||
* it must retry the operation in a context where blocking is acceptable,
|
||||
* since we currently don't have NOWAIT semantics support for buffered IO
|
||||
* and may block there for many reasons (reserving space for example).
|
||||
* because even if we end up not blocking during the buffered IO attempt
|
||||
* below, we will block when flushing and waiting for the IO.
|
||||
*/
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
err = -EAGAIN;
|
||||
|
@ -1928,7 +1622,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
|||
loff_t count;
|
||||
ssize_t ret;
|
||||
|
||||
btrfs_inode_lock(inode, 0);
|
||||
btrfs_inode_lock(BTRFS_I(inode), 0);
|
||||
count = encoded->len;
|
||||
ret = generic_write_checks_count(iocb, &count);
|
||||
if (ret == 0 && count != encoded->len) {
|
||||
|
@ -1947,7 +1641,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
|||
|
||||
ret = btrfs_do_encoded_write(iocb, from, encoded);
|
||||
out:
|
||||
btrfs_inode_unlock(inode, 0);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2008,10 +1702,12 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
|
|||
{
|
||||
struct btrfs_file_private *private = filp->private_data;
|
||||
|
||||
if (private && private->filldir_buf)
|
||||
if (private) {
|
||||
kfree(private->filldir_buf);
|
||||
free_extent_state(private->llseek_cached_state);
|
||||
kfree(private);
|
||||
filp->private_data = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set by setattr when we are about to truncate a file from a non-zero
|
||||
|
@ -2118,7 +1814,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
|
||||
atomic_inc(&root->log_batch);
|
||||
|
||||
|
@ -2142,7 +1838,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|||
*/
|
||||
ret = start_ordered_ops(inode, start, end);
|
||||
if (ret) {
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -2245,7 +1941,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|||
* file again, but that will end up using the synchronization
|
||||
* inside btrfs_sync_log to keep things safe.
|
||||
*/
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
|
||||
if (ret == BTRFS_NO_LOG_SYNC) {
|
||||
ret = btrfs_end_transaction(trans);
|
||||
|
@ -2313,7 +2009,7 @@ out:
|
|||
|
||||
out_release_extents:
|
||||
btrfs_release_log_ctx_extents(&ctx);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -2908,7 +2604,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
|
|||
bool truncated_block = false;
|
||||
bool updated_inode = false;
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode, offset, len);
|
||||
if (ret)
|
||||
|
@ -2956,7 +2652,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
|
|||
truncated_block = true;
|
||||
ret = btrfs_truncate_block(BTRFS_I(inode), offset, 0, 0);
|
||||
if (ret) {
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -3055,7 +2751,7 @@ out_only_mutex:
|
|||
ret = ret2;
|
||||
}
|
||||
}
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3366,7 +3062,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
if (mode & FALLOC_FL_PUNCH_HOLE)
|
||||
return btrfs_punch_hole(file, offset, len);
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
|
||||
ret = inode_newsize_ok(inode, offset + len);
|
||||
|
@ -3416,7 +3112,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
|
||||
if (mode & FALLOC_FL_ZERO_RANGE) {
|
||||
ret = btrfs_zero_range(inode, offset, len, mode);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3514,7 +3210,7 @@ out_unlock:
|
|||
unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
|
||||
&cached_state);
|
||||
out:
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
extent_changeset_free(data_reserved);
|
||||
return ret;
|
||||
}
|
||||
|
@ -3526,117 +3222,106 @@ out:
|
|||
* looping while it gets adjacent subranges, and merging them together.
|
||||
*/
|
||||
static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state,
|
||||
bool *search_io_tree,
|
||||
u64 *delalloc_start_ret, u64 *delalloc_end_ret)
|
||||
{
|
||||
const u64 len = end + 1 - start;
|
||||
struct extent_map_tree *em_tree = &inode->extent_tree;
|
||||
struct extent_map *em;
|
||||
u64 em_end;
|
||||
u64 delalloc_len;
|
||||
u64 len = end + 1 - start;
|
||||
u64 delalloc_len = 0;
|
||||
struct btrfs_ordered_extent *oe;
|
||||
u64 oe_start;
|
||||
u64 oe_end;
|
||||
|
||||
/*
|
||||
* Search the io tree first for EXTENT_DELALLOC. If we find any, it
|
||||
* means we have delalloc (dirty pages) for which writeback has not
|
||||
* started yet.
|
||||
*/
|
||||
if (*search_io_tree) {
|
||||
spin_lock(&inode->lock);
|
||||
if (inode->delalloc_bytes > 0) {
|
||||
spin_unlock(&inode->lock);
|
||||
*delalloc_start_ret = start;
|
||||
delalloc_len = count_range_bits(&inode->io_tree, delalloc_start_ret, end,
|
||||
len, EXTENT_DELALLOC, 1);
|
||||
delalloc_len = count_range_bits(&inode->io_tree,
|
||||
delalloc_start_ret, end,
|
||||
len, EXTENT_DELALLOC, 1,
|
||||
cached_state);
|
||||
} else {
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (delalloc_len > 0) {
|
||||
/*
|
||||
* If delalloc was found then *delalloc_start_ret has a sector size
|
||||
* aligned value (rounded down).
|
||||
*/
|
||||
if (delalloc_len > 0)
|
||||
*delalloc_end_ret = *delalloc_start_ret + delalloc_len - 1;
|
||||
|
||||
if (*delalloc_start_ret == start) {
|
||||
/* Delalloc for the whole range, nothing more to do. */
|
||||
if (*delalloc_end_ret == end)
|
||||
return true;
|
||||
/* Else trim our search range for ordered extents. */
|
||||
start = *delalloc_end_ret + 1;
|
||||
len = end + 1 - start;
|
||||
}
|
||||
} else {
|
||||
/* No delalloc, future calls don't need to search again. */
|
||||
*search_io_tree = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now also check if there's any extent map in the range that does not
|
||||
* map to a hole or prealloc extent. We do this because:
|
||||
* Now also check if there's any ordered extent in the range.
|
||||
* We do this because:
|
||||
*
|
||||
* 1) When delalloc is flushed, the file range is locked, we clear the
|
||||
* EXTENT_DELALLOC bit from the io tree and create an extent map for
|
||||
* an allocated extent. So we might just have been called after
|
||||
* delalloc is flushed and before the ordered extent completes and
|
||||
* inserts the new file extent item in the subvolume's btree;
|
||||
* EXTENT_DELALLOC bit from the io tree and create an extent map and
|
||||
* an ordered extent for the write. So we might just have been called
|
||||
* after delalloc is flushed and before the ordered extent completes
|
||||
* and inserts the new file extent item in the subvolume's btree;
|
||||
*
|
||||
* 2) We may have an extent map created by flushing delalloc for a
|
||||
* 2) We may have an ordered extent created by flushing delalloc for a
|
||||
* subrange that starts before the subrange we found marked with
|
||||
* EXTENT_DELALLOC in the io tree.
|
||||
*
|
||||
* We could also use the extent map tree to find such delalloc that is
|
||||
* being flushed, but using the ordered extents tree is more efficient
|
||||
* because it's usually much smaller as ordered extents are removed from
|
||||
* the tree once they complete. With the extent maps, we mau have them
|
||||
* in the extent map tree for a very long time, and they were either
|
||||
* created by previous writes or loaded by read operations.
|
||||
*/
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, start, len);
|
||||
read_unlock(&em_tree->lock);
|
||||
|
||||
/* extent_map_end() returns a non-inclusive end offset. */
|
||||
em_end = em ? extent_map_end(em) : 0;
|
||||
|
||||
/*
|
||||
* If we have a hole/prealloc extent map, check the next one if this one
|
||||
* ends before our range's end.
|
||||
*/
|
||||
if (em && (em->block_start == EXTENT_MAP_HOLE ||
|
||||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) && em_end < end) {
|
||||
struct extent_map *next_em;
|
||||
|
||||
read_lock(&em_tree->lock);
|
||||
next_em = lookup_extent_mapping(em_tree, em_end, len - em_end);
|
||||
read_unlock(&em_tree->lock);
|
||||
|
||||
free_extent_map(em);
|
||||
em_end = next_em ? extent_map_end(next_em) : 0;
|
||||
em = next_em;
|
||||
}
|
||||
|
||||
if (em && (em->block_start == EXTENT_MAP_HOLE ||
|
||||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* No extent map or one for a hole or prealloc extent. Use the delalloc
|
||||
* range we found in the io tree if we have one.
|
||||
*/
|
||||
if (!em)
|
||||
oe = btrfs_lookup_first_ordered_range(inode, start, len);
|
||||
if (!oe)
|
||||
return (delalloc_len > 0);
|
||||
|
||||
/*
|
||||
* We don't have any range as EXTENT_DELALLOC in the io tree, so the
|
||||
* extent map is the only subrange representing delalloc.
|
||||
*/
|
||||
/* The ordered extent may span beyond our search range. */
|
||||
oe_start = max(oe->file_offset, start);
|
||||
oe_end = min(oe->file_offset + oe->num_bytes - 1, end);
|
||||
|
||||
btrfs_put_ordered_extent(oe);
|
||||
|
||||
/* Don't have unflushed delalloc, return the ordered extent range. */
|
||||
if (delalloc_len == 0) {
|
||||
*delalloc_start_ret = em->start;
|
||||
*delalloc_end_ret = min(end, em_end - 1);
|
||||
free_extent_map(em);
|
||||
*delalloc_start_ret = oe_start;
|
||||
*delalloc_end_ret = oe_end;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The extent map represents a delalloc range that starts before the
|
||||
* delalloc range we found in the io tree.
|
||||
* We have both unflushed delalloc (io_tree) and an ordered extent.
|
||||
* If the ranges are adjacent returned a combined range, otherwise
|
||||
* return the leftmost range.
|
||||
*/
|
||||
if (em->start < *delalloc_start_ret) {
|
||||
*delalloc_start_ret = em->start;
|
||||
/*
|
||||
* If the ranges are adjacent, return a combined range.
|
||||
* Otherwise return the extent map's range.
|
||||
*/
|
||||
if (em_end < *delalloc_start_ret)
|
||||
*delalloc_end_ret = min(end, em_end - 1);
|
||||
|
||||
free_extent_map(em);
|
||||
return true;
|
||||
if (oe_start < *delalloc_start_ret) {
|
||||
if (oe_end < *delalloc_start_ret)
|
||||
*delalloc_end_ret = oe_end;
|
||||
*delalloc_start_ret = oe_start;
|
||||
} else if (*delalloc_end_ret + 1 == oe_start) {
|
||||
*delalloc_end_ret = oe_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* The extent map starts after the delalloc range we found in the io
|
||||
* tree. If it's adjacent, return a combined range, otherwise return
|
||||
* the range found in the io tree.
|
||||
*/
|
||||
if (*delalloc_end_ret + 1 == em->start)
|
||||
*delalloc_end_ret = min(end, em_end - 1);
|
||||
|
||||
free_extent_map(em);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -3648,6 +3333,8 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end
|
|||
* sector size aligned.
|
||||
* @end: The end offset (inclusive value) of the search range.
|
||||
* It does not need to be sector size aligned.
|
||||
* @cached_state: Extent state record used for speeding up delalloc
|
||||
* searches in the inode's io_tree. Can be NULL.
|
||||
* @delalloc_start_ret: Output argument, set to the start offset of the
|
||||
* subrange found with delalloc (may not be sector size
|
||||
* aligned).
|
||||
|
@ -3659,10 +3346,12 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end
|
|||
* end offsets of the subrange.
|
||||
*/
|
||||
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state,
|
||||
u64 *delalloc_start_ret, u64 *delalloc_end_ret)
|
||||
{
|
||||
u64 cur_offset = round_down(start, inode->root->fs_info->sectorsize);
|
||||
u64 prev_delalloc_end = 0;
|
||||
bool search_io_tree = true;
|
||||
bool ret = false;
|
||||
|
||||
while (cur_offset < end) {
|
||||
|
@ -3671,6 +3360,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
|||
bool delalloc;
|
||||
|
||||
delalloc = find_delalloc_subrange(inode, cur_offset, end,
|
||||
cached_state, &search_io_tree,
|
||||
&delalloc_start,
|
||||
&delalloc_end);
|
||||
if (!delalloc)
|
||||
|
@ -3716,13 +3406,14 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
|||
* is found, it updates @start_ret with the start of the subrange.
|
||||
*/
|
||||
static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence,
|
||||
struct extent_state **cached_state,
|
||||
u64 start, u64 end, u64 *start_ret)
|
||||
{
|
||||
u64 delalloc_start;
|
||||
u64 delalloc_end;
|
||||
bool delalloc;
|
||||
|
||||
delalloc = btrfs_find_delalloc_in_range(inode, start, end,
|
||||
delalloc = btrfs_find_delalloc_in_range(inode, start, end, cached_state,
|
||||
&delalloc_start, &delalloc_end);
|
||||
if (delalloc && whence == SEEK_DATA) {
|
||||
*start_ret = delalloc_start;
|
||||
|
@ -3765,11 +3456,13 @@ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence,
|
|||
return false;
|
||||
}
|
||||
|
||||
static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset,
|
||||
int whence)
|
||||
static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file->f_mapping->host);
|
||||
struct btrfs_file_private *private = file->private_data;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct extent_state **delalloc_cached_state;
|
||||
const loff_t i_size = i_size_read(&inode->vfs_inode);
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
struct btrfs_root *root = inode->root;
|
||||
|
@ -3794,6 +3487,22 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset,
|
|||
inode_get_bytes(&inode->vfs_inode) == i_size)
|
||||
return i_size;
|
||||
|
||||
if (!private) {
|
||||
private = kzalloc(sizeof(*private), GFP_KERNEL);
|
||||
/*
|
||||
* No worries if memory allocation failed.
|
||||
* The private structure is used only for speeding up multiple
|
||||
* lseek SEEK_HOLE/DATA calls to a file when there's delalloc,
|
||||
* so everything will still be correct.
|
||||
*/
|
||||
file->private_data = private;
|
||||
}
|
||||
|
||||
if (private)
|
||||
delalloc_cached_state = &private->llseek_cached_state;
|
||||
else
|
||||
delalloc_cached_state = NULL;
|
||||
|
||||
/*
|
||||
* offset can be negative, in this case we start finding DATA/HOLE from
|
||||
* the very start of the file.
|
||||
|
@ -3871,6 +3580,7 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset,
|
|||
search_start = offset;
|
||||
|
||||
found = find_desired_extent_in_hole(inode, whence,
|
||||
delalloc_cached_state,
|
||||
search_start,
|
||||
key.offset - 1,
|
||||
&found_start);
|
||||
|
@ -3905,6 +3615,7 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset,
|
|||
search_start = offset;
|
||||
|
||||
found = find_desired_extent_in_hole(inode, whence,
|
||||
delalloc_cached_state,
|
||||
search_start,
|
||||
extent_end - 1,
|
||||
&found_start);
|
||||
|
@ -3946,7 +3657,8 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset,
|
|||
|
||||
/* We have an implicit hole from the last extent found up to i_size. */
|
||||
if (!found && start < i_size) {
|
||||
found = find_desired_extent_in_hole(inode, whence, start,
|
||||
found = find_desired_extent_in_hole(inode, whence,
|
||||
delalloc_cached_state, start,
|
||||
i_size - 1, &start);
|
||||
if (!found)
|
||||
start = i_size;
|
||||
|
@ -3974,9 +3686,9 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
|
|||
return generic_file_llseek(file, offset, whence);
|
||||
case SEEK_DATA:
|
||||
case SEEK_HOLE:
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
|
||||
offset = find_desired_extent(BTRFS_I(inode), offset, whence);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
offset = find_desired_extent(file, offset, whence);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -4031,7 +3743,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
|
||||
return 0;
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
again:
|
||||
/*
|
||||
* This is similar to what we do for direct IO writes, see the comment
|
||||
|
@ -4080,7 +3792,7 @@ again:
|
|||
goto again;
|
||||
}
|
||||
}
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
return ret < 0 ? ret : read;
|
||||
}
|
||||
|
||||
|
@ -4117,23 +3829,6 @@ const struct file_operations btrfs_file_operations = {
|
|||
.remap_file_range = btrfs_remap_file_range,
|
||||
};
|
||||
|
||||
void __cold btrfs_auto_defrag_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_inode_defrag_cachep);
|
||||
}
|
||||
|
||||
int __init btrfs_auto_defrag_init(void)
|
||||
{
|
||||
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
|
||||
sizeof(struct inode_defrag), 0,
|
||||
SLAB_MEM_SPREAD,
|
||||
NULL);
|
||||
if (!btrfs_inode_defrag_cachep)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
|
||||
{
|
||||
int ret;
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_FILE_H
|
||||
#define BTRFS_FILE_H
|
||||
|
||||
extern const struct file_operations btrfs_file_operations;
|
||||
|
||||
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
|
||||
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_inode *inode,
|
||||
struct btrfs_drop_extents_args *args);
|
||||
int btrfs_replace_file_extents(struct btrfs_inode *inode,
|
||||
struct btrfs_path *path, const u64 start,
|
||||
const u64 end,
|
||||
struct btrfs_replace_extent_info *extent_info,
|
||||
struct btrfs_trans_handle **trans_out);
|
||||
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode, u64 start, u64 end);
|
||||
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
int btrfs_release_file(struct inode *inode, struct file *file);
|
||||
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||
struct extent_state **cached, bool noreserve);
|
||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
|
||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes, bool nowait);
|
||||
void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
|
||||
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state,
|
||||
u64 *delalloc_start_ret, u64 *delalloc_end_ret);
|
||||
|
||||
#endif
|
|
@ -11,8 +11,10 @@
|
|||
#include <linux/ratelimit.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "misc.h"
|
||||
#include "free-space-cache.h"
|
||||
#include "transaction.h"
|
||||
#include "disk-io.h"
|
||||
|
@ -24,11 +26,18 @@
|
|||
#include "discard.h"
|
||||
#include "subpage.h"
|
||||
#include "inode-item.h"
|
||||
#include "accessors.h"
|
||||
#include "file-item.h"
|
||||
#include "file.h"
|
||||
#include "super.h"
|
||||
|
||||
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
|
||||
#define MAX_CACHE_BYTES_PER_GIG SZ_64K
|
||||
#define FORCE_EXTENT_THRESHOLD SZ_1M
|
||||
|
||||
static struct kmem_cache *btrfs_free_space_cachep;
|
||||
static struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||
|
||||
struct btrfs_trim_range {
|
||||
u64 start;
|
||||
u64 bytes;
|
||||
|
@ -251,7 +260,7 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
|
||||
if (ret) {
|
||||
btrfs_add_delayed_iput(inode);
|
||||
btrfs_add_delayed_iput(BTRFS_I(inode));
|
||||
goto out;
|
||||
}
|
||||
clear_nlink(inode);
|
||||
|
@ -265,7 +274,7 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
|
|||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
/* One for the lookup ref */
|
||||
btrfs_add_delayed_iput(inode);
|
||||
btrfs_add_delayed_iput(BTRFS_I(inode));
|
||||
|
||||
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
|
||||
key.type = 0;
|
||||
|
@ -1363,8 +1372,8 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
|
|||
path, block_group->start);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out cached info to an inode
|
||||
/*
|
||||
* Write out cached info to an inode.
|
||||
*
|
||||
* @root: root the inode belongs to
|
||||
* @inode: freespace inode we are writing out
|
||||
|
@ -2717,8 +2726,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
|||
btrfs_mark_bg_unused(block_group);
|
||||
} else if (bg_reclaim_threshold &&
|
||||
reclaimable_unusable >=
|
||||
div_factor_fine(block_group->zone_capacity,
|
||||
bg_reclaim_threshold)) {
|
||||
mult_perc(block_group->zone_capacity, bg_reclaim_threshold)) {
|
||||
btrfs_mark_bg_to_reclaim(block_group);
|
||||
}
|
||||
|
||||
|
@ -3028,10 +3036,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_is_free_space_trimmed - see if everything is trimmed
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
/*
|
||||
* Walk @block_group's free space rb_tree to determine if everything is trimmed.
|
||||
*/
|
||||
bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
|
||||
|
@ -4132,6 +4137,31 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
int __init btrfs_free_space_init(void)
|
||||
{
|
||||
btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
|
||||
sizeof(struct btrfs_free_space), 0,
|
||||
SLAB_MEM_SPREAD, NULL);
|
||||
if (!btrfs_free_space_cachep)
|
||||
return -ENOMEM;
|
||||
|
||||
btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
|
||||
PAGE_SIZE, PAGE_SIZE,
|
||||
SLAB_MEM_SPREAD, NULL);
|
||||
if (!btrfs_free_space_bitmap_cachep) {
|
||||
kmem_cache_destroy(btrfs_free_space_cachep);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_free_space_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_free_space_cachep);
|
||||
kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
/*
|
||||
* Use this if you need to make a bitmap or extent entry specifically, it
|
||||
|
|
|
@ -43,6 +43,17 @@ static inline bool btrfs_free_space_trimming_bitmap(
|
|||
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deltas are an effective way to populate global statistics. Give macro names
|
||||
* to make it clear what we're doing. An example is discard_extents in
|
||||
* btrfs_free_space_ctl.
|
||||
*/
|
||||
enum {
|
||||
BTRFS_STAT_CURR,
|
||||
BTRFS_STAT_PREV,
|
||||
BTRFS_STAT_NR_ENTRIES,
|
||||
};
|
||||
|
||||
struct btrfs_free_space_ctl {
|
||||
spinlock_t tree_lock;
|
||||
struct rb_root free_space_offset;
|
||||
|
@ -79,6 +90,8 @@ struct btrfs_io_ctl {
|
|||
int bitmaps;
|
||||
};
|
||||
|
||||
int __init btrfs_free_space_init(void);
|
||||
void __cold btrfs_free_space_exit(void);
|
||||
struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
||||
struct btrfs_path *path);
|
||||
int create_free_space_inode(struct btrfs_trans_handle *trans,
|
||||
|
|
|
@ -5,12 +5,17 @@
|
|||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "locking.h"
|
||||
#include "free-space-tree.h"
|
||||
#include "transaction.h"
|
||||
#include "block-group.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "root-tree.h"
|
||||
|
||||
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group *block_group,
|
||||
|
@ -803,7 +808,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
|
|||
u32 flags;
|
||||
int ret;
|
||||
|
||||
if (block_group->needs_free_space) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
|
||||
ret = __add_block_group_free_space(trans, block_group, path);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -996,7 +1001,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
|
|||
u32 flags;
|
||||
int ret;
|
||||
|
||||
if (block_group->needs_free_space) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
|
||||
ret = __add_block_group_free_space(trans, block_group, path);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -1299,7 +1304,7 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
|
|||
{
|
||||
int ret;
|
||||
|
||||
block_group->needs_free_space = 0;
|
||||
clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
|
||||
|
||||
ret = add_new_free_space_info(trans, block_group, path);
|
||||
if (ret)
|
||||
|
@ -1321,7 +1326,7 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
|
||||
mutex_lock(&block_group->free_space_lock);
|
||||
if (!block_group->needs_free_space)
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
|
||||
goto out;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
@ -1354,7 +1359,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
|
|||
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
|
||||
return 0;
|
||||
|
||||
if (block_group->needs_free_space) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
|
||||
/* We never added this block group to the free space tree. */
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
|
||||
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
|
||||
disk_super = fs_info->super_copy;
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
if (!(features & flag)) {
|
||||
spin_lock(&fs_info->super_lock);
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
if (!(features & flag)) {
|
||||
features |= flag;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
btrfs_info(fs_info,
|
||||
"setting incompat feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
|
||||
disk_super = fs_info->super_copy;
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
if (features & flag) {
|
||||
spin_lock(&fs_info->super_lock);
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
if (features & flag) {
|
||||
features &= ~flag;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
btrfs_info(fs_info,
|
||||
"clearing incompat feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
|
||||
disk_super = fs_info->super_copy;
|
||||
features = btrfs_super_compat_ro_flags(disk_super);
|
||||
if (!(features & flag)) {
|
||||
spin_lock(&fs_info->super_lock);
|
||||
features = btrfs_super_compat_ro_flags(disk_super);
|
||||
if (!(features & flag)) {
|
||||
features |= flag;
|
||||
btrfs_set_super_compat_ro_flags(disk_super, features);
|
||||
btrfs_info(fs_info,
|
||||
"setting compat-ro feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
|
||||
disk_super = fs_info->super_copy;
|
||||
features = btrfs_super_compat_ro_flags(disk_super);
|
||||
if (features & flag) {
|
||||
spin_lock(&fs_info->super_lock);
|
||||
features = btrfs_super_compat_ro_flags(disk_super);
|
||||
if (features & flag) {
|
||||
features &= ~flag;
|
||||
btrfs_set_super_compat_ro_flags(disk_super, features);
|
||||
btrfs_info(fs_info,
|
||||
"clearing compat-ro feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,976 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_FS_H
|
||||
#define BTRFS_FS_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/btrfs_tree.h>
|
||||
#include <linux/sizes.h>
|
||||
#include "extent-io-tree.h"
|
||||
#include "extent_map.h"
|
||||
#include "async-thread.h"
|
||||
#include "block-rsv.h"
|
||||
|
||||
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
|
||||
|
||||
#define BTRFS_OLDEST_GENERATION 0ULL
|
||||
|
||||
#define BTRFS_EMPTY_DIR_SIZE 0
|
||||
|
||||
#define BTRFS_DIRTY_METADATA_THRESH SZ_32M
|
||||
|
||||
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
||||
#define BTRFS_SUPER_INFO_SIZE 4096
|
||||
static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||
|
||||
/*
|
||||
* The reserved space at the beginning of each device. It covers the primary
|
||||
* super block and leaves space for potential use by other tools like
|
||||
* bootloaders or to lower potential damage of accidental overwrite.
|
||||
*/
|
||||
#define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M)
|
||||
/*
|
||||
* Runtime (in-memory) states of filesystem
|
||||
*/
|
||||
enum {
|
||||
/* Global indicator of serious filesystem errors */
|
||||
BTRFS_FS_STATE_ERROR,
|
||||
/*
|
||||
* Filesystem is being remounted, allow to skip some operations, like
|
||||
* defrag
|
||||
*/
|
||||
BTRFS_FS_STATE_REMOUNTING,
|
||||
/* Filesystem in RO mode */
|
||||
BTRFS_FS_STATE_RO,
|
||||
/* Track if a transaction abort has been reported on this filesystem */
|
||||
BTRFS_FS_STATE_TRANS_ABORTED,
|
||||
/*
|
||||
* Bio operations should be blocked on this filesystem because a source
|
||||
* or target device is being destroyed as part of a device replace
|
||||
*/
|
||||
BTRFS_FS_STATE_DEV_REPLACING,
|
||||
/* The btrfs_fs_info created for self-tests */
|
||||
BTRFS_FS_STATE_DUMMY_FS_INFO,
|
||||
|
||||
BTRFS_FS_STATE_NO_CSUMS,
|
||||
|
||||
/* Indicates there was an error cleaning up a log tree. */
|
||||
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
|
||||
|
||||
BTRFS_FS_STATE_COUNT
|
||||
};
|
||||
|
||||
enum {
|
||||
BTRFS_FS_CLOSING_START,
|
||||
BTRFS_FS_CLOSING_DONE,
|
||||
BTRFS_FS_LOG_RECOVERING,
|
||||
BTRFS_FS_OPEN,
|
||||
BTRFS_FS_QUOTA_ENABLED,
|
||||
BTRFS_FS_UPDATE_UUID_TREE_GEN,
|
||||
BTRFS_FS_CREATING_FREE_SPACE_TREE,
|
||||
BTRFS_FS_BTREE_ERR,
|
||||
BTRFS_FS_LOG1_ERR,
|
||||
BTRFS_FS_LOG2_ERR,
|
||||
BTRFS_FS_QUOTA_OVERRIDE,
|
||||
/* Used to record internally whether fs has been frozen */
|
||||
BTRFS_FS_FROZEN,
|
||||
/*
|
||||
* Indicate that balance has been set up from the ioctl and is in the
|
||||
* main phase. The fs_info::balance_ctl is initialized.
|
||||
*/
|
||||
BTRFS_FS_BALANCE_RUNNING,
|
||||
|
||||
/*
|
||||
* Indicate that relocation of a chunk has started, it's set per chunk
|
||||
* and is toggled between chunks.
|
||||
*/
|
||||
BTRFS_FS_RELOC_RUNNING,
|
||||
|
||||
/* Indicate that the cleaner thread is awake and doing something. */
|
||||
BTRFS_FS_CLEANER_RUNNING,
|
||||
|
||||
/*
|
||||
* The checksumming has an optimized version and is considered fast,
|
||||
* so we don't need to offload checksums to workqueues.
|
||||
*/
|
||||
BTRFS_FS_CSUM_IMPL_FAST,
|
||||
|
||||
/* Indicate that the discard workqueue can service discards. */
|
||||
BTRFS_FS_DISCARD_RUNNING,
|
||||
|
||||
/* Indicate that we need to cleanup space cache v1 */
|
||||
BTRFS_FS_CLEANUP_SPACE_CACHE_V1,
|
||||
|
||||
/* Indicate that we can't trust the free space tree for caching yet */
|
||||
BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
|
||||
|
||||
/* Indicate whether there are any tree modification log users */
|
||||
BTRFS_FS_TREE_MOD_LOG_USERS,
|
||||
|
||||
/* Indicate that we want the transaction kthread to commit right now. */
|
||||
BTRFS_FS_COMMIT_TRANS,
|
||||
|
||||
/* Indicate we have half completed snapshot deletions pending. */
|
||||
BTRFS_FS_UNFINISHED_DROPS,
|
||||
|
||||
/* Indicate we have to finish a zone to do next allocation. */
|
||||
BTRFS_FS_NEED_ZONE_FINISH,
|
||||
|
||||
/* Indicate that we want to commit the transaction. */
|
||||
BTRFS_FS_NEED_TRANS_COMMIT,
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
/* Indicate if we have error/warn message printed on 32bit systems */
|
||||
BTRFS_FS_32BIT_ERROR,
|
||||
BTRFS_FS_32BIT_WARN,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags for mount options.
|
||||
*
|
||||
* Note: don't forget to add new options to btrfs_show_options()
|
||||
*/
|
||||
enum {
|
||||
BTRFS_MOUNT_NODATASUM = (1UL << 0),
|
||||
BTRFS_MOUNT_NODATACOW = (1UL << 1),
|
||||
BTRFS_MOUNT_NOBARRIER = (1UL << 2),
|
||||
BTRFS_MOUNT_SSD = (1UL << 3),
|
||||
BTRFS_MOUNT_DEGRADED = (1UL << 4),
|
||||
BTRFS_MOUNT_COMPRESS = (1UL << 5),
|
||||
BTRFS_MOUNT_NOTREELOG = (1UL << 6),
|
||||
BTRFS_MOUNT_FLUSHONCOMMIT = (1UL << 7),
|
||||
BTRFS_MOUNT_SSD_SPREAD = (1UL << 8),
|
||||
BTRFS_MOUNT_NOSSD = (1UL << 9),
|
||||
BTRFS_MOUNT_DISCARD_SYNC = (1UL << 10),
|
||||
BTRFS_MOUNT_FORCE_COMPRESS = (1UL << 11),
|
||||
BTRFS_MOUNT_SPACE_CACHE = (1UL << 12),
|
||||
BTRFS_MOUNT_CLEAR_CACHE = (1UL << 13),
|
||||
BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED = (1UL << 14),
|
||||
BTRFS_MOUNT_ENOSPC_DEBUG = (1UL << 15),
|
||||
BTRFS_MOUNT_AUTO_DEFRAG = (1UL << 16),
|
||||
BTRFS_MOUNT_USEBACKUPROOT = (1UL << 17),
|
||||
BTRFS_MOUNT_SKIP_BALANCE = (1UL << 18),
|
||||
BTRFS_MOUNT_CHECK_INTEGRITY = (1UL << 19),
|
||||
BTRFS_MOUNT_CHECK_INTEGRITY_DATA = (1UL << 20),
|
||||
BTRFS_MOUNT_PANIC_ON_FATAL_ERROR = (1UL << 21),
|
||||
BTRFS_MOUNT_RESCAN_UUID_TREE = (1UL << 22),
|
||||
BTRFS_MOUNT_FRAGMENT_DATA = (1UL << 23),
|
||||
BTRFS_MOUNT_FRAGMENT_METADATA = (1UL << 24),
|
||||
BTRFS_MOUNT_FREE_SPACE_TREE = (1UL << 25),
|
||||
BTRFS_MOUNT_NOLOGREPLAY = (1UL << 26),
|
||||
BTRFS_MOUNT_REF_VERIFY = (1UL << 27),
|
||||
BTRFS_MOUNT_DISCARD_ASYNC = (1UL << 28),
|
||||
BTRFS_MOUNT_IGNOREBADROOTS = (1UL << 29),
|
||||
BTRFS_MOUNT_IGNOREDATACSUMS = (1UL << 30),
|
||||
BTRFS_MOUNT_NODISCARD = (1UL << 31),
|
||||
};
|
||||
|
||||
/*
|
||||
* Compat flags that we support. If any incompat flags are set other than the
|
||||
* ones specified below then we will fail to mount
|
||||
*/
|
||||
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
|
||||
#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
|
||||
#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
|
||||
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
|
||||
(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
|
||||
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
|
||||
BTRFS_FEATURE_COMPAT_RO_VERITY | \
|
||||
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
|
||||
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
/*
|
||||
* Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
|
||||
*/
|
||||
#define BTRFS_FEATURE_INCOMPAT_SUPP \
|
||||
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
|
||||
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
|
||||
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID56 | \
|
||||
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
|
||||
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
|
||||
BTRFS_FEATURE_INCOMPAT_ZONED | \
|
||||
BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
|
||||
#else
|
||||
#define BTRFS_FEATURE_INCOMPAT_SUPP \
|
||||
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
|
||||
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
|
||||
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID56 | \
|
||||
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
|
||||
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
|
||||
BTRFS_FEATURE_INCOMPAT_ZONED)
|
||||
#endif
|
||||
|
||||
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
|
||||
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
|
||||
#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
|
||||
|
||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
||||
|
||||
struct btrfs_dev_replace {
|
||||
/* See #define above */
|
||||
u64 replace_state;
|
||||
/* Seconds since 1-Jan-1970 */
|
||||
time64_t time_started;
|
||||
/* Seconds since 1-Jan-1970 */
|
||||
time64_t time_stopped;
|
||||
atomic64_t num_write_errors;
|
||||
atomic64_t num_uncorrectable_read_errors;
|
||||
|
||||
u64 cursor_left;
|
||||
u64 committed_cursor_left;
|
||||
u64 cursor_left_last_write_of_item;
|
||||
u64 cursor_right;
|
||||
|
||||
/* See #define above */
|
||||
u64 cont_reading_from_srcdev_mode;
|
||||
|
||||
int is_valid;
|
||||
int item_needs_writeback;
|
||||
struct btrfs_device *srcdev;
|
||||
struct btrfs_device *tgtdev;
|
||||
|
||||
struct mutex lock_finishing_cancel_unmount;
|
||||
struct rw_semaphore rwsem;
|
||||
|
||||
struct btrfs_scrub_progress scrub_progress;
|
||||
|
||||
struct percpu_counter bio_counter;
|
||||
wait_queue_head_t replace_wait;
|
||||
};
|
||||
|
||||
/*
|
||||
* Free clusters are used to claim free space in relatively large chunks,
|
||||
* allowing us to do less seeky writes. They are used for all metadata
|
||||
* allocations. In ssd_spread mode they are also used for data allocations.
|
||||
*/
|
||||
struct btrfs_free_cluster {
|
||||
spinlock_t lock;
|
||||
spinlock_t refill_lock;
|
||||
struct rb_root root;
|
||||
|
||||
/* Largest extent in this cluster */
|
||||
u64 max_size;
|
||||
|
||||
/* First extent starting offset */
|
||||
u64 window_start;
|
||||
|
||||
/* We did a full search and couldn't create a cluster */
|
||||
bool fragmented;
|
||||
|
||||
struct btrfs_block_group *block_group;
|
||||
/*
|
||||
* When a cluster is allocated from a block group, we put the cluster
|
||||
* onto a list in the block group so that it can be freed before the
|
||||
* block group is freed.
|
||||
*/
|
||||
struct list_head block_group_list;
|
||||
};
|
||||
|
||||
/* Discard control. */
|
||||
/*
|
||||
* Async discard uses multiple lists to differentiate the discard filter
|
||||
* parameters. Index 0 is for completely free block groups where we need to
|
||||
* ensure the entire block group is trimmed without being lossy. Indices
|
||||
* afterwards represent monotonically decreasing discard filter sizes to
|
||||
* prioritize what should be discarded next.
|
||||
*/
|
||||
#define BTRFS_NR_DISCARD_LISTS 3
|
||||
#define BTRFS_DISCARD_INDEX_UNUSED 0
|
||||
#define BTRFS_DISCARD_INDEX_START 1
|
||||
|
||||
struct btrfs_discard_ctl {
|
||||
struct workqueue_struct *discard_workers;
|
||||
struct delayed_work work;
|
||||
spinlock_t lock;
|
||||
struct btrfs_block_group *block_group;
|
||||
struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
|
||||
u64 prev_discard;
|
||||
u64 prev_discard_time;
|
||||
atomic_t discardable_extents;
|
||||
atomic64_t discardable_bytes;
|
||||
u64 max_discard_size;
|
||||
u64 delay_ms;
|
||||
u32 iops_limit;
|
||||
u32 kbps_limit;
|
||||
u64 discard_extent_bytes;
|
||||
u64 discard_bitmap_bytes;
|
||||
atomic64_t discard_bytes_saved;
|
||||
};
|
||||
|
||||
/*
|
||||
* Exclusive operations (device replace, resize, device add/remove, balance)
|
||||
*/
|
||||
enum btrfs_exclusive_operation {
|
||||
BTRFS_EXCLOP_NONE,
|
||||
BTRFS_EXCLOP_BALANCE_PAUSED,
|
||||
BTRFS_EXCLOP_BALANCE,
|
||||
BTRFS_EXCLOP_DEV_ADD,
|
||||
BTRFS_EXCLOP_DEV_REMOVE,
|
||||
BTRFS_EXCLOP_DEV_REPLACE,
|
||||
BTRFS_EXCLOP_RESIZE,
|
||||
BTRFS_EXCLOP_SWAP_ACTIVATE,
|
||||
};
|
||||
|
||||
/* Store data about transaction commits, exported via sysfs. */
|
||||
struct btrfs_commit_stats {
|
||||
/* Total number of commits */
|
||||
u64 commit_count;
|
||||
/* The maximum commit duration so far in ns */
|
||||
u64 max_commit_dur;
|
||||
/* The last commit duration in ns */
|
||||
u64 last_commit_dur;
|
||||
/* The total commit duration in ns */
|
||||
u64 total_commit_dur;
|
||||
};
|
||||
|
||||
struct btrfs_fs_info {
|
||||
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
|
||||
unsigned long flags;
|
||||
struct btrfs_root *tree_root;
|
||||
struct btrfs_root *chunk_root;
|
||||
struct btrfs_root *dev_root;
|
||||
struct btrfs_root *fs_root;
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_root *uuid_root;
|
||||
struct btrfs_root *data_reloc_root;
|
||||
struct btrfs_root *block_group_root;
|
||||
|
||||
/* The log root tree is a directory of all the other log roots */
|
||||
struct btrfs_root *log_root_tree;
|
||||
|
||||
/* The tree that holds the global roots (csum, extent, etc) */
|
||||
rwlock_t global_root_lock;
|
||||
struct rb_root global_root_tree;
|
||||
|
||||
spinlock_t fs_roots_radix_lock;
|
||||
struct radix_tree_root fs_roots_radix;
|
||||
|
||||
/* Block group cache stuff */
|
||||
rwlock_t block_group_cache_lock;
|
||||
struct rb_root_cached block_group_cache_tree;
|
||||
|
||||
/* Keep track of unallocated space */
|
||||
atomic64_t free_chunk_space;
|
||||
|
||||
/* Track ranges which are used by log trees blocks/logged data extents */
|
||||
struct extent_io_tree excluded_extents;
|
||||
|
||||
/* logical->physical extent mapping */
|
||||
struct extent_map_tree mapping_tree;
|
||||
|
||||
/*
|
||||
* Block reservation for extent, checksum, root tree and delayed dir
|
||||
* index item.
|
||||
*/
|
||||
struct btrfs_block_rsv global_block_rsv;
|
||||
/* Block reservation for metadata operations */
|
||||
struct btrfs_block_rsv trans_block_rsv;
|
||||
/* Block reservation for chunk tree */
|
||||
struct btrfs_block_rsv chunk_block_rsv;
|
||||
/* Block reservation for delayed operations */
|
||||
struct btrfs_block_rsv delayed_block_rsv;
|
||||
/* Block reservation for delayed refs */
|
||||
struct btrfs_block_rsv delayed_refs_rsv;
|
||||
|
||||
struct btrfs_block_rsv empty_block_rsv;
|
||||
|
||||
u64 generation;
|
||||
u64 last_trans_committed;
|
||||
/*
|
||||
* Generation of the last transaction used for block group relocation
|
||||
* since the filesystem was last mounted (or 0 if none happened yet).
|
||||
* Must be written and read while holding btrfs_fs_info::commit_root_sem.
|
||||
*/
|
||||
u64 last_reloc_trans;
|
||||
u64 avg_delayed_ref_runtime;
|
||||
|
||||
/*
|
||||
* This is updated to the current trans every time a full commit is
|
||||
* required instead of the faster short fsync log commits
|
||||
*/
|
||||
u64 last_trans_log_full_commit;
|
||||
unsigned long mount_opt;
|
||||
|
||||
unsigned long compress_type:4;
|
||||
unsigned int compress_level;
|
||||
u32 commit_interval;
|
||||
/*
|
||||
* It is a suggestive number, the read side is safe even it gets a
|
||||
* wrong number because we will write out the data into a regular
|
||||
* extent. The write side(mount/remount) is under ->s_umount lock,
|
||||
* so it is also safe.
|
||||
*/
|
||||
u64 max_inline;
|
||||
|
||||
struct btrfs_transaction *running_transaction;
|
||||
wait_queue_head_t transaction_throttle;
|
||||
wait_queue_head_t transaction_wait;
|
||||
wait_queue_head_t transaction_blocked_wait;
|
||||
wait_queue_head_t async_submit_wait;
|
||||
|
||||
/*
|
||||
* Used to protect the incompat_flags, compat_flags, compat_ro_flags
|
||||
* when they are updated.
|
||||
*
|
||||
* Because we do not clear the flags for ever, so we needn't use
|
||||
* the lock on the read side.
|
||||
*
|
||||
* We also needn't use the lock when we mount the fs, because
|
||||
* there is no other task which will update the flag.
|
||||
*/
|
||||
spinlock_t super_lock;
|
||||
struct btrfs_super_block *super_copy;
|
||||
struct btrfs_super_block *super_for_commit;
|
||||
struct super_block *sb;
|
||||
struct inode *btree_inode;
|
||||
struct mutex tree_log_mutex;
|
||||
struct mutex transaction_kthread_mutex;
|
||||
struct mutex cleaner_mutex;
|
||||
struct mutex chunk_mutex;
|
||||
|
||||
/*
|
||||
* This is taken to make sure we don't set block groups ro after the
|
||||
* free space cache has been allocated on them.
|
||||
*/
|
||||
struct mutex ro_block_group_mutex;
|
||||
|
||||
/*
|
||||
* This is used during read/modify/write to make sure no two ios are
|
||||
* trying to mod the same stripe at the same time.
|
||||
*/
|
||||
struct btrfs_stripe_hash_table *stripe_hash_table;
|
||||
|
||||
/*
|
||||
* This protects the ordered operations list only while we are
|
||||
* processing all of the entries on it. This way we make sure the
|
||||
* commit code doesn't find the list temporarily empty because another
|
||||
* function happens to be doing non-waiting preflush before jumping
|
||||
* into the main commit.
|
||||
*/
|
||||
struct mutex ordered_operations_mutex;
|
||||
|
||||
struct rw_semaphore commit_root_sem;
|
||||
|
||||
struct rw_semaphore cleanup_work_sem;
|
||||
|
||||
struct rw_semaphore subvol_sem;
|
||||
|
||||
spinlock_t trans_lock;
|
||||
/*
|
||||
* The reloc mutex goes with the trans lock, it is taken during commit
|
||||
* to protect us from the relocation code.
|
||||
*/
|
||||
struct mutex reloc_mutex;
|
||||
|
||||
struct list_head trans_list;
|
||||
struct list_head dead_roots;
|
||||
struct list_head caching_block_groups;
|
||||
|
||||
spinlock_t delayed_iput_lock;
|
||||
struct list_head delayed_iputs;
|
||||
atomic_t nr_delayed_iputs;
|
||||
wait_queue_head_t delayed_iputs_wait;
|
||||
|
||||
atomic64_t tree_mod_seq;
|
||||
|
||||
/* This protects tree_mod_log and tree_mod_seq_list */
|
||||
rwlock_t tree_mod_log_lock;
|
||||
struct rb_root tree_mod_log;
|
||||
struct list_head tree_mod_seq_list;
|
||||
|
||||
atomic_t async_delalloc_pages;
|
||||
|
||||
/* This is used to protect the following list -- ordered_roots. */
|
||||
spinlock_t ordered_root_lock;
|
||||
|
||||
/*
|
||||
* All fs/file tree roots in which there are data=ordered extents
|
||||
* pending writeback are added into this list.
|
||||
*
|
||||
* These can span multiple transactions and basically include every
|
||||
* dirty data page that isn't from nodatacow.
|
||||
*/
|
||||
struct list_head ordered_roots;
|
||||
|
||||
struct mutex delalloc_root_mutex;
|
||||
spinlock_t delalloc_root_lock;
|
||||
/* All fs/file tree roots that have delalloc inodes. */
|
||||
struct list_head delalloc_roots;
|
||||
|
||||
/*
|
||||
* There is a pool of worker threads for checksumming during writes and
|
||||
* a pool for checksumming after reads. This is because readers can
|
||||
* run with FS locks held, and the writers may be waiting for those
|
||||
* locks. We don't want ordering in the pending list to cause
|
||||
* deadlocks, and so the two are serviced separately.
|
||||
*
|
||||
* A third pool does submit_bio to avoid deadlocking with the other two.
|
||||
*/
|
||||
struct btrfs_workqueue *workers;
|
||||
struct btrfs_workqueue *hipri_workers;
|
||||
struct btrfs_workqueue *delalloc_workers;
|
||||
struct btrfs_workqueue *flush_workers;
|
||||
struct workqueue_struct *endio_workers;
|
||||
struct workqueue_struct *endio_meta_workers;
|
||||
struct workqueue_struct *rmw_workers;
|
||||
struct workqueue_struct *compressed_write_workers;
|
||||
struct btrfs_workqueue *endio_write_workers;
|
||||
struct btrfs_workqueue *endio_freespace_worker;
|
||||
struct btrfs_workqueue *caching_workers;
|
||||
|
||||
/*
|
||||
* Fixup workers take dirty pages that didn't properly go through the
|
||||
* cow mechanism and make them safe to write. It happens for the
|
||||
* sys_munmap function call path.
|
||||
*/
|
||||
struct btrfs_workqueue *fixup_workers;
|
||||
struct btrfs_workqueue *delayed_workers;
|
||||
|
||||
struct task_struct *transaction_kthread;
|
||||
struct task_struct *cleaner_kthread;
|
||||
u32 thread_pool_size;
|
||||
|
||||
struct kobject *space_info_kobj;
|
||||
struct kobject *qgroups_kobj;
|
||||
struct kobject *discard_kobj;
|
||||
|
||||
/* Used to keep from writing metadata until there is a nice batch */
|
||||
struct percpu_counter dirty_metadata_bytes;
|
||||
struct percpu_counter delalloc_bytes;
|
||||
struct percpu_counter ordered_bytes;
|
||||
s32 dirty_metadata_batch;
|
||||
s32 delalloc_batch;
|
||||
|
||||
struct list_head dirty_cowonly_roots;
|
||||
|
||||
struct btrfs_fs_devices *fs_devices;
|
||||
|
||||
/*
|
||||
* The space_info list is effectively read only after initial setup.
|
||||
* It is populated at mount time and cleaned up after all block groups
|
||||
* are removed. RCU is used to protect it.
|
||||
*/
|
||||
struct list_head space_info;
|
||||
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
|
||||
struct reloc_control *reloc_ctl;
|
||||
|
||||
/* data_alloc_cluster is only used in ssd_spread mode */
|
||||
struct btrfs_free_cluster data_alloc_cluster;
|
||||
|
||||
/* All metadata allocations go through this cluster. */
|
||||
struct btrfs_free_cluster meta_alloc_cluster;
|
||||
|
||||
/* Auto defrag inodes go here. */
|
||||
spinlock_t defrag_inodes_lock;
|
||||
struct rb_root defrag_inodes;
|
||||
atomic_t defrag_running;
|
||||
|
||||
/* Used to protect avail_{data, metadata, system}_alloc_bits */
|
||||
seqlock_t profiles_lock;
|
||||
/*
|
||||
* These three are in extended format (availability of single chunks is
|
||||
* denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other types are denoted
|
||||
* by corresponding BTRFS_BLOCK_GROUP_* bits)
|
||||
*/
|
||||
u64 avail_data_alloc_bits;
|
||||
u64 avail_metadata_alloc_bits;
|
||||
u64 avail_system_alloc_bits;
|
||||
|
||||
/* Balance state */
|
||||
spinlock_t balance_lock;
|
||||
struct mutex balance_mutex;
|
||||
atomic_t balance_pause_req;
|
||||
atomic_t balance_cancel_req;
|
||||
struct btrfs_balance_control *balance_ctl;
|
||||
wait_queue_head_t balance_wait_q;
|
||||
|
||||
/* Cancellation requests for chunk relocation */
|
||||
atomic_t reloc_cancel_req;
|
||||
|
||||
u32 data_chunk_allocations;
|
||||
u32 metadata_ratio;
|
||||
|
||||
void *bdev_holder;
|
||||
|
||||
/* Private scrub information */
|
||||
struct mutex scrub_lock;
|
||||
atomic_t scrubs_running;
|
||||
atomic_t scrub_pause_req;
|
||||
atomic_t scrubs_paused;
|
||||
atomic_t scrub_cancel_req;
|
||||
wait_queue_head_t scrub_pause_wait;
|
||||
/*
|
||||
* The worker pointers are NULL iff the refcount is 0, ie. scrub is not
|
||||
* running.
|
||||
*/
|
||||
refcount_t scrub_workers_refcnt;
|
||||
struct workqueue_struct *scrub_workers;
|
||||
struct workqueue_struct *scrub_wr_completion_workers;
|
||||
struct workqueue_struct *scrub_parity_workers;
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
struct btrfs_discard_ctl discard_ctl;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
u32 check_integrity_print_mask;
|
||||
#endif
|
||||
/* Is qgroup tracking in a consistent state? */
|
||||
u64 qgroup_flags;
|
||||
|
||||
/* Holds configuration and tracking. Protected by qgroup_lock. */
|
||||
struct rb_root qgroup_tree;
|
||||
spinlock_t qgroup_lock;
|
||||
|
||||
/*
|
||||
* Used to avoid frequently calling ulist_alloc()/ulist_free()
|
||||
* when doing qgroup accounting, it must be protected by qgroup_lock.
|
||||
*/
|
||||
struct ulist *qgroup_ulist;
|
||||
|
||||
/*
|
||||
* Protect user change for quota operations. If a transaction is needed,
|
||||
* it must be started before locking this lock.
|
||||
*/
|
||||
struct mutex qgroup_ioctl_lock;
|
||||
|
||||
/* List of dirty qgroups to be written at next commit. */
|
||||
struct list_head dirty_qgroups;
|
||||
|
||||
/* Used by qgroup for an efficient tree traversal. */
|
||||
u64 qgroup_seq;
|
||||
|
||||
/* Qgroup rescan items. */
|
||||
/* Protects the progress item */
|
||||
struct mutex qgroup_rescan_lock;
|
||||
struct btrfs_key qgroup_rescan_progress;
|
||||
struct btrfs_workqueue *qgroup_rescan_workers;
|
||||
struct completion qgroup_rescan_completion;
|
||||
struct btrfs_work qgroup_rescan_work;
|
||||
/* Protected by qgroup_rescan_lock */
|
||||
bool qgroup_rescan_running;
|
||||
u8 qgroup_drop_subtree_thres;
|
||||
|
||||
/* Filesystem state */
|
||||
unsigned long fs_state;
|
||||
|
||||
struct btrfs_delayed_root *delayed_root;
|
||||
|
||||
/* Extent buffer radix tree */
|
||||
spinlock_t buffer_lock;
|
||||
/* Entries are eb->start / sectorsize */
|
||||
struct radix_tree_root buffer_radix;
|
||||
|
||||
/* Next backup root to be overwritten */
|
||||
int backup_root_index;
|
||||
|
||||
/* Device replace state */
|
||||
struct btrfs_dev_replace dev_replace;
|
||||
|
||||
struct semaphore uuid_tree_rescan_sem;
|
||||
|
||||
/* Used to reclaim the metadata space in the background. */
|
||||
struct work_struct async_reclaim_work;
|
||||
struct work_struct async_data_reclaim_work;
|
||||
struct work_struct preempt_reclaim_work;
|
||||
|
||||
/* Reclaim partially filled block groups in the background */
|
||||
struct work_struct reclaim_bgs_work;
|
||||
struct list_head reclaim_bgs;
|
||||
int bg_reclaim_threshold;
|
||||
|
||||
spinlock_t unused_bgs_lock;
|
||||
struct list_head unused_bgs;
|
||||
struct mutex unused_bg_unpin_mutex;
|
||||
/* Protect block groups that are going to be deleted */
|
||||
struct mutex reclaim_bgs_lock;
|
||||
|
||||
/* Cached block sizes */
|
||||
u32 nodesize;
|
||||
u32 sectorsize;
|
||||
/* ilog2 of sectorsize, use to avoid 64bit division */
|
||||
u32 sectorsize_bits;
|
||||
u32 csum_size;
|
||||
u32 csums_per_leaf;
|
||||
u32 stripesize;
|
||||
|
||||
/*
|
||||
* Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
|
||||
* filesystem, on zoned it depends on the device constraints.
|
||||
*/
|
||||
u64 max_extent_size;
|
||||
|
||||
/* Block groups and devices containing active swapfiles. */
|
||||
spinlock_t swapfile_pins_lock;
|
||||
struct rb_root swapfile_pins;
|
||||
|
||||
struct crypto_shash *csum_shash;
|
||||
|
||||
/* Type of exclusive operation running, protected by super_lock */
|
||||
enum btrfs_exclusive_operation exclusive_operation;
|
||||
|
||||
/*
|
||||
* Zone size > 0 when in ZONED mode, otherwise it's used for a check
|
||||
* if the mode is enabled
|
||||
*/
|
||||
u64 zone_size;
|
||||
|
||||
/* Max size to emit ZONE_APPEND write command */
|
||||
u64 max_zone_append_size;
|
||||
struct mutex zoned_meta_io_lock;
|
||||
spinlock_t treelog_bg_lock;
|
||||
u64 treelog_bg;
|
||||
|
||||
/*
|
||||
* Start of the dedicated data relocation block group, protected by
|
||||
* relocation_bg_lock.
|
||||
*/
|
||||
spinlock_t relocation_bg_lock;
|
||||
u64 data_reloc_bg;
|
||||
struct mutex zoned_data_reloc_io_lock;
|
||||
|
||||
u64 nr_global_roots;
|
||||
|
||||
spinlock_t zone_active_bgs_lock;
|
||||
struct list_head zone_active_bgs;
|
||||
|
||||
/* Updates are not protected by any lock */
|
||||
struct btrfs_commit_stats commit_stats;
|
||||
|
||||
/*
|
||||
* Last generation where we dropped a non-relocation root.
|
||||
* Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
|
||||
* to change it and to read it, respectively.
|
||||
*/
|
||||
u64 last_root_drop_gen;
|
||||
|
||||
/*
|
||||
* Annotations for transaction events (structures are empty when
|
||||
* compiled without lockdep).
|
||||
*/
|
||||
struct lockdep_map btrfs_trans_num_writers_map;
|
||||
struct lockdep_map btrfs_trans_num_extwriters_map;
|
||||
struct lockdep_map btrfs_state_change_map[4];
|
||||
struct lockdep_map btrfs_trans_pending_ordered_map;
|
||||
struct lockdep_map btrfs_ordered_extent_map;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
spinlock_t ref_verify_lock;
|
||||
struct rb_root block_tree;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct kobject *debug_kobj;
|
||||
struct list_head allocated_roots;
|
||||
|
||||
spinlock_t eb_leak_lock;
|
||||
struct list_head allocated_ebs;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
|
||||
u64 gen)
|
||||
{
|
||||
WRITE_ONCE(fs_info->last_root_drop_gen, gen);
|
||||
}
|
||||
|
||||
static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return READ_ONCE(fs_info->last_root_drop_gen);
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the number of bytes to be checksummed and figure out how many leaves
|
||||
* it would require to store the csums for that many bytes.
|
||||
*/
|
||||
static inline u64 btrfs_csum_bytes_to_leaves(
|
||||
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
|
||||
{
|
||||
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
|
||||
|
||||
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use this if we would be adding new items, as we could split nodes as we cow
|
||||
* down the tree.
|
||||
*/
|
||||
static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned num_items)
|
||||
{
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
|
||||
}
|
||||
|
||||
/*
|
||||
* Doing a truncate or a modification won't result in new nodes or leaves, just
|
||||
* what we need for COW.
|
||||
*/
|
||||
static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned num_items)
|
||||
{
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
|
||||
}
|
||||
|
||||
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
||||
sizeof(struct btrfs_item))
|
||||
|
||||
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return fs_info->zone_size > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Count how many fs_info->max_extent_size cover the @size
|
||||
*/
|
||||
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (!fs_info)
|
||||
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
|
||||
#endif
|
||||
|
||||
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
|
||||
}
|
||||
|
||||
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type);
|
||||
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type);
|
||||
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation op);
|
||||
|
||||
/* Compatibility and incompatibility defines */
|
||||
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name);
|
||||
void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name);
|
||||
void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name);
|
||||
void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
const char *name);
|
||||
|
||||
#define __btrfs_fs_incompat(fs_info, flags) \
|
||||
(!!(btrfs_super_incompat_flags((fs_info)->super_copy) & (flags)))
|
||||
|
||||
#define __btrfs_fs_compat_ro(fs_info, flags) \
|
||||
(!!(btrfs_super_compat_ro_flags((fs_info)->super_copy) & (flags)))
|
||||
|
||||
#define btrfs_set_fs_incompat(__fs_info, opt) \
|
||||
__btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt)
|
||||
|
||||
#define btrfs_clear_fs_incompat(__fs_info, opt) \
|
||||
__btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt)
|
||||
|
||||
#define btrfs_fs_incompat(fs_info, opt) \
|
||||
__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
|
||||
|
||||
#define btrfs_set_fs_compat_ro(__fs_info, opt) \
|
||||
__btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt)
|
||||
|
||||
#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
|
||||
__btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt)
|
||||
|
||||
#define btrfs_fs_compat_ro(fs_info, opt) \
|
||||
__btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
|
||||
|
||||
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
|
||||
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
|
||||
#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
|
||||
#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \
|
||||
BTRFS_MOUNT_##opt)
|
||||
|
||||
#define btrfs_set_and_info(fs_info, opt, fmt, args...) \
|
||||
do { \
|
||||
if (!btrfs_test_opt(fs_info, opt)) \
|
||||
btrfs_info(fs_info, fmt, ##args); \
|
||||
btrfs_set_opt(fs_info->mount_opt, opt); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_clear_and_info(fs_info, opt, fmt, args...) \
|
||||
do { \
|
||||
if (btrfs_test_opt(fs_info, opt)) \
|
||||
btrfs_info(fs_info, fmt, ##args); \
|
||||
btrfs_clear_opt(fs_info->mount_opt, opt); \
|
||||
} while (0)
|
||||
|
||||
static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/* Do it this way so we only ever do one test_bit in the normal case. */
|
||||
if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) {
|
||||
if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags))
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we remount the fs to be R/O or umount the fs, the cleaner needn't do
|
||||
* anything except sleeping. This function is used to check the status of
|
||||
* the fs.
|
||||
* We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
|
||||
* since setting and checking for SB_RDONLY in the superblock's flags is not
|
||||
* atomic.
|
||||
*/
|
||||
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
|
||||
btrfs_fs_closing(fs_info);
|
||||
}
|
||||
|
||||
static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
|
||||
}
|
||||
|
||||
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
|
||||
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
|
||||
#define EXPORT_FOR_TESTS
|
||||
|
||||
static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||
}
|
||||
|
||||
void btrfs_test_destroy_inode(struct inode *inode);
|
||||
|
||||
#else
|
||||
|
||||
#define EXPORT_FOR_TESTS static
|
||||
|
||||
static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -4,14 +4,20 @@
|
|||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "inode-item.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "print-tree.h"
|
||||
#include "space-info.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "file-item.h"
|
||||
|
||||
struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
|
||||
int slot, const char *name,
|
||||
int name_len)
|
||||
int slot,
|
||||
const struct fscrypt_str *name)
|
||||
{
|
||||
struct btrfs_inode_ref *ref;
|
||||
unsigned long ptr;
|
||||
|
@ -27,9 +33,10 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
|
|||
len = btrfs_inode_ref_name_len(leaf, ref);
|
||||
name_ptr = (unsigned long)(ref + 1);
|
||||
cur_offset += len + sizeof(*ref);
|
||||
if (len != name_len)
|
||||
if (len != name->len)
|
||||
continue;
|
||||
if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
|
||||
if (memcmp_extent_buffer(leaf, name->name, name_ptr,
|
||||
name->len) == 0)
|
||||
return ref;
|
||||
}
|
||||
return NULL;
|
||||
|
@ -37,7 +44,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
|
|||
|
||||
struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
|
||||
struct extent_buffer *leaf, int slot, u64 ref_objectid,
|
||||
const char *name, int name_len)
|
||||
const struct fscrypt_str *name)
|
||||
{
|
||||
struct btrfs_inode_extref *extref;
|
||||
unsigned long ptr;
|
||||
|
@ -60,9 +67,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
|
|||
name_ptr = (unsigned long)(&extref->name);
|
||||
ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
|
||||
|
||||
if (ref_name_len == name_len &&
|
||||
if (ref_name_len == name->len &&
|
||||
btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
|
||||
(memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0))
|
||||
(memcmp_extent_buffer(leaf, name->name, name_ptr,
|
||||
name->len) == 0))
|
||||
return extref;
|
||||
|
||||
cur_offset += ref_name_len + sizeof(*extref);
|
||||
|
@ -75,7 +83,7 @@ struct btrfs_inode_extref *
|
|||
btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const char *name, int name_len,
|
||||
const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid, int ins_len,
|
||||
int cow)
|
||||
{
|
||||
|
@ -84,7 +92,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
|
|||
|
||||
key.objectid = inode_objectid;
|
||||
key.type = BTRFS_INODE_EXTREF_KEY;
|
||||
key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
|
||||
key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
|
||||
if (ret < 0)
|
||||
|
@ -92,13 +100,13 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
|
|||
if (ret > 0)
|
||||
return NULL;
|
||||
return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
|
||||
ref_objectid, name, name_len);
|
||||
ref_objectid, name);
|
||||
|
||||
}
|
||||
|
||||
static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid,
|
||||
u64 *index)
|
||||
{
|
||||
|
@ -107,14 +115,14 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_inode_extref *extref;
|
||||
struct extent_buffer *leaf;
|
||||
int ret;
|
||||
int del_len = name_len + sizeof(*extref);
|
||||
int del_len = name->len + sizeof(*extref);
|
||||
unsigned long ptr;
|
||||
unsigned long item_start;
|
||||
u32 item_size;
|
||||
|
||||
key.objectid = inode_objectid;
|
||||
key.type = BTRFS_INODE_EXTREF_KEY;
|
||||
key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
|
||||
key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
|
@ -132,7 +140,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
|||
* readonly.
|
||||
*/
|
||||
extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
|
||||
ref_objectid, name, name_len);
|
||||
ref_objectid, name);
|
||||
if (!extref) {
|
||||
btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
|
||||
ret = -EROFS;
|
||||
|
@ -168,8 +176,7 @@ out:
|
|||
}
|
||||
|
||||
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_root *root, const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 *index)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
|
@ -182,7 +189,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
|||
u32 sub_item_len;
|
||||
int ret;
|
||||
int search_ext_refs = 0;
|
||||
int del_len = name_len + sizeof(*ref);
|
||||
int del_len = name->len + sizeof(*ref);
|
||||
|
||||
key.objectid = inode_objectid;
|
||||
key.offset = ref_objectid;
|
||||
|
@ -201,8 +208,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
|||
goto out;
|
||||
}
|
||||
|
||||
ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name,
|
||||
name_len);
|
||||
ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name);
|
||||
if (!ref) {
|
||||
ret = -ENOENT;
|
||||
search_ext_refs = 1;
|
||||
|
@ -219,7 +225,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
|||
goto out;
|
||||
}
|
||||
ptr = (unsigned long)ref;
|
||||
sub_item_len = name_len + sizeof(*ref);
|
||||
sub_item_len = name->len + sizeof(*ref);
|
||||
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
|
||||
item_size - (ptr + sub_item_len - item_start));
|
||||
|
@ -233,7 +239,7 @@ out:
|
|||
* name in our ref array. Find and remove the extended
|
||||
* inode ref then.
|
||||
*/
|
||||
return btrfs_del_inode_extref(trans, root, name, name_len,
|
||||
return btrfs_del_inode_extref(trans, root, name,
|
||||
inode_objectid, ref_objectid, index);
|
||||
}
|
||||
|
||||
|
@ -247,12 +253,13 @@ out:
|
|||
*/
|
||||
static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 index)
|
||||
const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid,
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_inode_extref *extref;
|
||||
int ret;
|
||||
int ins_len = name_len + sizeof(*extref);
|
||||
int ins_len = name->len + sizeof(*extref);
|
||||
unsigned long ptr;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
|
@ -260,7 +267,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
|
|||
|
||||
key.objectid = inode_objectid;
|
||||
key.type = BTRFS_INODE_EXTREF_KEY;
|
||||
key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
|
||||
key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
|
@ -272,7 +279,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
|
|||
if (btrfs_find_name_in_ext_backref(path->nodes[0],
|
||||
path->slots[0],
|
||||
ref_objectid,
|
||||
name, name_len))
|
||||
name))
|
||||
goto out;
|
||||
|
||||
btrfs_extend_item(path, ins_len);
|
||||
|
@ -286,12 +293,12 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
|
|||
ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len;
|
||||
extref = (struct btrfs_inode_extref *)ptr;
|
||||
|
||||
btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len);
|
||||
btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len);
|
||||
btrfs_set_inode_extref_index(path->nodes[0], extref, index);
|
||||
btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);
|
||||
|
||||
ptr = (unsigned long)&extref->name;
|
||||
write_extent_buffer(path->nodes[0], name, ptr, name_len);
|
||||
write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
|
||||
out:
|
||||
|
@ -301,8 +308,7 @@ out:
|
|||
|
||||
/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
|
||||
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_root *root, const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 index)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
@ -311,7 +317,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_inode_ref *ref;
|
||||
unsigned long ptr;
|
||||
int ret;
|
||||
int ins_len = name_len + sizeof(*ref);
|
||||
int ins_len = name->len + sizeof(*ref);
|
||||
|
||||
key.objectid = inode_objectid;
|
||||
key.offset = ref_objectid;
|
||||
|
@ -327,7 +333,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
|||
if (ret == -EEXIST) {
|
||||
u32 old_size;
|
||||
ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
|
||||
name, name_len);
|
||||
name);
|
||||
if (ref)
|
||||
goto out;
|
||||
|
||||
|
@ -336,7 +342,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
|||
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_inode_ref);
|
||||
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
|
||||
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
|
||||
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
|
||||
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
ret = 0;
|
||||
|
@ -344,7 +350,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
|||
if (ret == -EOVERFLOW) {
|
||||
if (btrfs_find_name_in_backref(path->nodes[0],
|
||||
path->slots[0],
|
||||
name, name_len))
|
||||
name))
|
||||
ret = -EEXIST;
|
||||
else
|
||||
ret = -EMLINK;
|
||||
|
@ -353,11 +359,11 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
|||
} else {
|
||||
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_inode_ref);
|
||||
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
|
||||
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
|
||||
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
}
|
||||
write_extent_buffer(path->nodes[0], name, ptr, name_len);
|
||||
write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
|
||||
out:
|
||||
|
@ -370,7 +376,6 @@ out:
|
|||
if (btrfs_super_incompat_flags(disk_super)
|
||||
& BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
|
||||
ret = btrfs_insert_inode_extref(trans, root, name,
|
||||
name_len,
|
||||
inode_objectid,
|
||||
ref_objectid, index);
|
||||
}
|
||||
|
|
|
@ -64,33 +64,31 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *root,
|
||||
struct btrfs_truncate_control *control);
|
||||
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_root *root, const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 index);
|
||||
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_root *root, const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid, u64 *index);
|
||||
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid);
|
||||
int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_path *path,
|
||||
int btrfs_lookup_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_path *path,
|
||||
struct btrfs_key *location, int mod);
|
||||
|
||||
struct btrfs_inode_extref *btrfs_lookup_inode_extref(
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const char *name, int name_len,
|
||||
const struct fscrypt_str *name,
|
||||
u64 inode_objectid, u64 ref_objectid, int ins_len,
|
||||
int cow);
|
||||
|
||||
struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
|
||||
int slot, const char *name,
|
||||
int name_len);
|
||||
int slot,
|
||||
const struct fscrypt_str *name);
|
||||
struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
|
||||
struct extent_buffer *leaf, int slot, u64 ref_objectid,
|
||||
const char *name, int name_len);
|
||||
const struct fscrypt_str *name);
|
||||
|
||||
#endif
|
||||
|
|
898
fs/btrfs/inode.c
898
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
945
fs/btrfs/ioctl.c
945
fs/btrfs/ioctl.c
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,17 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_IOCTL_H
|
||||
#define BTRFS_IOCTL_H
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
|
||||
int btrfs_fileattr_set(struct user_namespace *mnt_userns,
|
||||
struct dentry *dentry, struct fileattr *fa);
|
||||
int btrfs_ioctl_get_supported_features(void __user *arg);
|
||||
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
||||
int __pure btrfs_is_empty_uuid(u8 *uuid);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
|
||||
#endif
|
|
@ -12,6 +12,7 @@
|
|||
#include "ctree.h"
|
||||
#include "extent_io.h"
|
||||
#include "locking.h"
|
||||
#include "accessors.h"
|
||||
|
||||
/*
|
||||
* Lockdep class keys for extent_buffer->lock's in this root. For a given
|
||||
|
|
|
@ -78,6 +78,82 @@ enum btrfs_lock_nesting {
|
|||
BTRFS_NESTING_MAX,
|
||||
};
|
||||
|
||||
enum btrfs_lockdep_trans_states {
|
||||
BTRFS_LOCKDEP_TRANS_COMMIT_START,
|
||||
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
|
||||
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
|
||||
BTRFS_LOCKDEP_TRANS_COMPLETED,
|
||||
};
|
||||
|
||||
/*
|
||||
* Lockdep annotation for wait events.
|
||||
*
|
||||
* @owner: The struct where the lockdep map is defined
|
||||
* @lock: The lockdep map corresponding to a wait event
|
||||
*
|
||||
* This macro is used to annotate a wait event. In this case a thread acquires
|
||||
* the lockdep map as writer (exclusive lock) because it has to block until all
|
||||
* the threads that hold the lock as readers signal the condition for the wait
|
||||
* event and release their locks.
|
||||
*/
|
||||
#define btrfs_might_wait_for_event(owner, lock) \
|
||||
do { \
|
||||
rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_); \
|
||||
rwsem_release(&owner->lock##_map, _THIS_IP_); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Protection for the resource/condition of a wait event.
|
||||
*
|
||||
* @owner: The struct where the lockdep map is defined
|
||||
* @lock: The lockdep map corresponding to a wait event
|
||||
*
|
||||
* Many threads can modify the condition for the wait event at the same time
|
||||
* and signal the threads that block on the wait event. The threads that modify
|
||||
* the condition and do the signaling acquire the lock as readers (shared
|
||||
* lock).
|
||||
*/
|
||||
#define btrfs_lockdep_acquire(owner, lock) \
|
||||
rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_)
|
||||
|
||||
/*
|
||||
* Used after signaling the condition for a wait event to release the lockdep
|
||||
* map held by a reader thread.
|
||||
*/
|
||||
#define btrfs_lockdep_release(owner, lock) \
|
||||
rwsem_release(&owner->lock##_map, _THIS_IP_)
|
||||
|
||||
/*
|
||||
* Macros for the transaction states wait events, similar to the generic wait
|
||||
* event macros.
|
||||
*/
|
||||
#define btrfs_might_wait_for_state(owner, i) \
|
||||
do { \
|
||||
rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \
|
||||
rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_trans_state_lockdep_acquire(owner, i) \
|
||||
rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_)
|
||||
|
||||
#define btrfs_trans_state_lockdep_release(owner, i) \
|
||||
rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_)
|
||||
|
||||
/* Initialization of the lockdep map */
|
||||
#define btrfs_lockdep_init_map(owner, lock) \
|
||||
do { \
|
||||
static struct lock_class_key lock##_key; \
|
||||
lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \
|
||||
} while (0)
|
||||
|
||||
/* Initialization of the transaction states lockdep maps. */
|
||||
#define btrfs_state_lockdep_init_map(owner, lock, state) \
|
||||
do { \
|
||||
static struct lock_class_key lock##_key; \
|
||||
lockdep_init_map(&owner->btrfs_state_change_map[state], #lock, \
|
||||
&lock##_key, 0); \
|
||||
} while (0)
|
||||
|
||||
static_assert(BTRFS_NESTING_MAX <= MAX_LOCKDEP_SUBCLASSES,
|
||||
"too many lock subclasses defined");
|
||||
|
||||
|
|
|
@ -13,8 +13,10 @@
|
|||
#include <linux/bio.h>
|
||||
#include <linux/lzo.h>
|
||||
#include <linux/refcount.h>
|
||||
#include "messages.h"
|
||||
#include "compression.h"
|
||||
#include "ctree.h"
|
||||
#include "super.h"
|
||||
|
||||
#define LZO_LEN 4
|
||||
|
||||
|
@ -425,7 +427,7 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
int lzo_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
int lzo_decompress(struct list_head *ws, const u8 *data_in,
|
||||
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
||||
size_t destlen)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,353 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "discard.h"
|
||||
#include "transaction.h"
|
||||
#include "space-info.h"
|
||||
#include "super.h"
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define STATE_STRING_PREFACE ": state "
|
||||
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
|
||||
|
||||
/*
|
||||
* Characters to print to indicate error conditions or uncommon filesystem state.
|
||||
* RO is not an error.
|
||||
*/
|
||||
static const char fs_state_chars[] = {
|
||||
[BTRFS_FS_STATE_ERROR] = 'E',
|
||||
[BTRFS_FS_STATE_REMOUNTING] = 'M',
|
||||
[BTRFS_FS_STATE_RO] = 0,
|
||||
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
|
||||
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
|
||||
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
|
||||
[BTRFS_FS_STATE_NO_CSUMS] = 'C',
|
||||
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
|
||||
};
|
||||
|
||||
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
|
||||
{
|
||||
unsigned int bit;
|
||||
bool states_printed = false;
|
||||
unsigned long fs_state = READ_ONCE(info->fs_state);
|
||||
char *curr = buf;
|
||||
|
||||
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
|
||||
curr += sizeof(STATE_STRING_PREFACE) - 1;
|
||||
|
||||
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
|
||||
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
|
||||
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
|
||||
*curr++ = fs_state_chars[bit];
|
||||
states_printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* If no states were printed, reset the buffer */
|
||||
if (!states_printed)
|
||||
curr = buf;
|
||||
|
||||
*curr++ = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Generally the error codes correspond to their respective errors, but there
|
||||
* are a few special cases.
|
||||
*
|
||||
* EUCLEAN: Any sort of corruption that we encounter. The tree-checker for
|
||||
* instance will return EUCLEAN if any of the blocks are corrupted in
|
||||
* a way that is problematic. We want to reserve EUCLEAN for these
|
||||
* sort of corruptions.
|
||||
*
|
||||
* EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
|
||||
* need to use EROFS for this case. We will have no idea of the
|
||||
* original failure, that will have been reported at the time we tripped
|
||||
* over the error. Each subsequent error that doesn't have any context
|
||||
* of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
|
||||
*/
|
||||
const char * __attribute_const__ btrfs_decode_error(int errno)
|
||||
{
|
||||
char *errstr = "unknown";
|
||||
|
||||
switch (errno) {
|
||||
case -ENOENT: /* -2 */
|
||||
errstr = "No such entry";
|
||||
break;
|
||||
case -EIO: /* -5 */
|
||||
errstr = "IO failure";
|
||||
break;
|
||||
case -ENOMEM: /* -12*/
|
||||
errstr = "Out of memory";
|
||||
break;
|
||||
case -EEXIST: /* -17 */
|
||||
errstr = "Object already exists";
|
||||
break;
|
||||
case -ENOSPC: /* -28 */
|
||||
errstr = "No space left";
|
||||
break;
|
||||
case -EROFS: /* -30 */
|
||||
errstr = "Readonly filesystem";
|
||||
break;
|
||||
case -EOPNOTSUPP: /* -95 */
|
||||
errstr = "Operation not supported";
|
||||
break;
|
||||
case -EUCLEAN: /* -117 */
|
||||
errstr = "Filesystem corrupted";
|
||||
break;
|
||||
case -EDQUOT: /* -122 */
|
||||
errstr = "Quota exceeded";
|
||||
break;
|
||||
}
|
||||
|
||||
return errstr;
|
||||
}
|
||||
|
||||
/*
|
||||
* __btrfs_handle_fs_error decodes expected errors from the caller and
|
||||
* invokes the appropriate error response.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
#ifdef CONFIG_PRINTK
|
||||
char statestr[STATE_STRING_BUF_LEN];
|
||||
const char *errstr;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PRINTK_INDEX
|
||||
printk_index_subsys_emit(
|
||||
"BTRFS: error (device %s%s) in %s:%d: errno=%d %s", KERN_CRIT, fmt);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Special case: if the error is EROFS, and we're already under
|
||||
* SB_RDONLY, then it is safe here.
|
||||
*/
|
||||
if (errno == -EROFS && sb_rdonly(sb))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
errstr = btrfs_decode_error(errno);
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
if (fmt) {
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
|
||||
sb->s_id, statestr, function, line, errno, errstr, &vaf);
|
||||
va_end(args);
|
||||
} else {
|
||||
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
|
||||
sb->s_id, statestr, function, line, errno, errstr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Today we only save the error info to memory. Long term we'll also
|
||||
* send it down to the disk.
|
||||
*/
|
||||
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
|
||||
|
||||
/* Don't go through full error handling during mount. */
|
||||
if (!(sb->s_flags & SB_BORN))
|
||||
return;
|
||||
|
||||
if (sb_rdonly(sb))
|
||||
return;
|
||||
|
||||
btrfs_discard_stop(fs_info);
|
||||
|
||||
/* Handle error by forcing the filesystem readonly. */
|
||||
btrfs_set_sb_rdonly(sb);
|
||||
btrfs_info(fs_info, "forced readonly");
|
||||
/*
|
||||
* Note that a running device replace operation is not canceled here
|
||||
* although there is no way to update the progress. It would add the
|
||||
* risk of a deadlock, therefore the canceling is omitted. The only
|
||||
* penalty is that some I/O remains active until the procedure
|
||||
* completes. The next time when the filesystem is mounted writable
|
||||
* again, the device replace operation continues.
|
||||
*/
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
static const char * const logtypes[] = {
|
||||
"emergency",
|
||||
"alert",
|
||||
"critical",
|
||||
"error",
|
||||
"warning",
|
||||
"notice",
|
||||
"info",
|
||||
"debug",
|
||||
};
|
||||
|
||||
/*
|
||||
* Use one ratelimit state per log level so that a flood of less important
|
||||
* messages doesn't cause more important ones to be dropped.
|
||||
*/
|
||||
static struct ratelimit_state printk_limits[] = {
|
||||
RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
};
|
||||
|
||||
void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
{
|
||||
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
int kern_level;
|
||||
const char *type = logtypes[4];
|
||||
struct ratelimit_state *ratelimit = &printk_limits[4];
|
||||
|
||||
#ifdef CONFIG_PRINTK_INDEX
|
||||
printk_index_subsys_emit("%sBTRFS %s (device %s): ", NULL, fmt);
|
||||
#endif
|
||||
|
||||
va_start(args, fmt);
|
||||
|
||||
while ((kern_level = printk_get_level(fmt)) != 0) {
|
||||
size_t size = printk_skip_level(fmt) - fmt;
|
||||
|
||||
if (kern_level >= '0' && kern_level <= '7') {
|
||||
memcpy(lvl, fmt, size);
|
||||
lvl[size] = '\0';
|
||||
type = logtypes[kern_level - '0'];
|
||||
ratelimit = &printk_limits[kern_level - '0'];
|
||||
}
|
||||
fmt += size;
|
||||
}
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
if (__ratelimit(ratelimit)) {
|
||||
if (fs_info) {
|
||||
char statestr[STATE_STRING_BUF_LEN];
|
||||
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
_printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
|
||||
fs_info->sb->s_id, statestr, &vaf);
|
||||
} else {
|
||||
_printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
|
||||
}
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BTRFS_ASSERT
|
||||
void __cold btrfs_assertfail(const char *expr, const char *file, int line)
|
||||
{
|
||||
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_err(fs_info,
|
||||
"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!test_and_set_bit(BTRFS_FS_32BIT_WARN, &fs_info->flags)) {
|
||||
btrfs_warn(fs_info, "reaching 32bit limit for logical addresses");
|
||||
btrfs_warn(fs_info,
|
||||
"due to page cache limit on 32bit systems, btrfs can't access metadata at or beyond %lluT",
|
||||
BTRFS_32BIT_MAX_FILE_SIZE >> 40);
|
||||
btrfs_warn(fs_info,
|
||||
"please consider upgrading to 64bit kernel/hardware");
|
||||
}
|
||||
}
|
||||
|
||||
void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!test_and_set_bit(BTRFS_FS_32BIT_ERROR, &fs_info->flags)) {
|
||||
btrfs_err(fs_info, "reached 32bit limit for logical addresses");
|
||||
btrfs_err(fs_info,
|
||||
"due to page cache limit on 32bit systems, metadata beyond %lluT can't be accessed",
|
||||
BTRFS_32BIT_MAX_FILE_SIZE >> 40);
|
||||
btrfs_err(fs_info,
|
||||
"please consider upgrading to 64bit kernel/hardware");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We only mark the transaction aborted and then set the file system read-only.
|
||||
* This will prevent new transactions from starting or trying to join this
|
||||
* one.
|
||||
*
|
||||
* This means that error recovery at the call site is limited to freeing
|
||||
* any local memory allocations and passing the error code up without
|
||||
* further cleanup. The transaction should complete as it normally would
|
||||
* in the call path but will return -EIO.
|
||||
*
|
||||
* We'll complete the cleanup in btrfs_end_transaction and
|
||||
* btrfs_commit_transaction.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
const char *function,
|
||||
unsigned int line, int errno, bool first_hit)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
WRITE_ONCE(trans->aborted, errno);
|
||||
WRITE_ONCE(trans->transaction->aborted, errno);
|
||||
if (first_hit && errno == -ENOSPC)
|
||||
btrfs_dump_space_info_for_trans_abort(fs_info);
|
||||
/* Wake up anybody who may be waiting on this transaction */
|
||||
wake_up(&fs_info->transaction_wait);
|
||||
wake_up(&fs_info->transaction_blocked_wait);
|
||||
__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* __btrfs_panic decodes unexpected, fatal errors from the caller, issues an
|
||||
* alert, and either panics or BUGs, depending on mount options.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
char *s_id = "<unknown>";
|
||||
const char *errstr;
|
||||
struct va_format vaf = { .fmt = fmt };
|
||||
va_list args;
|
||||
|
||||
if (fs_info)
|
||||
s_id = fs_info->sb->s_id;
|
||||
|
||||
va_start(args, fmt);
|
||||
vaf.va = &args;
|
||||
|
||||
errstr = btrfs_decode_error(errno);
|
||||
if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
|
||||
panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
|
||||
s_id, function, line, &vaf, errno, errstr);
|
||||
|
||||
btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
|
||||
function, line, &vaf, errno, errstr);
|
||||
va_end(args);
|
||||
/* Caller calls BUG() */
|
||||
}
|
|
@ -0,0 +1,245 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_MESSAGES_H
|
||||
#define BTRFS_MESSAGES_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_trans_handle;
|
||||
|
||||
static inline __printf(2, 3) __cold
|
||||
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define btrfs_printk(fs_info, fmt, args...) \
|
||||
_btrfs_printk(fs_info, fmt, ##args)
|
||||
|
||||
__printf(2, 3)
|
||||
__cold
|
||||
void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
|
||||
|
||||
#else
|
||||
|
||||
#define btrfs_printk(fs_info, fmt, args...) \
|
||||
btrfs_no_printk(fs_info, fmt, ##args)
|
||||
#endif
|
||||
|
||||
#define btrfs_emerg(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
/*
|
||||
* Wrappers that use printk_in_rcu
|
||||
*/
|
||||
#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
/*
|
||||
* Wrappers that use a ratelimited printk_in_rcu
|
||||
*/
|
||||
#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
/*
|
||||
* Wrappers that use a ratelimited printk
|
||||
*/
|
||||
#define btrfs_emerg_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args)
|
||||
#define btrfs_alert_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args)
|
||||
#define btrfs_crit_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args)
|
||||
#define btrfs_err_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args)
|
||||
#define btrfs_warn_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args)
|
||||
#define btrfs_notice_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args)
|
||||
#define btrfs_info_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args)
|
||||
|
||||
#if defined(CONFIG_DYNAMIC_DEBUG)
|
||||
#define btrfs_debug(fs_info, fmt, args...) \
|
||||
_dynamic_func_call_no_desc(fmt, btrfs_printk, \
|
||||
fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
|
||||
_dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu, \
|
||||
fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
|
||||
_dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \
|
||||
fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl(fs_info, fmt, args...) \
|
||||
_dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited, \
|
||||
fs_info, KERN_DEBUG fmt, ##args)
|
||||
#elif defined(DEBUG)
|
||||
#define btrfs_debug(fs_info, fmt, args...) \
|
||||
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl(fs_info, fmt, args...) \
|
||||
btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#else
|
||||
#define btrfs_debug(fs_info, fmt, args...) \
|
||||
btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
|
||||
btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#define btrfs_debug_rl(fs_info, fmt, args...) \
|
||||
btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
|
||||
#endif
|
||||
|
||||
#define btrfs_printk_in_rcu(fs_info, fmt, args...) \
|
||||
do { \
|
||||
rcu_read_lock(); \
|
||||
btrfs_printk(fs_info, fmt, ##args); \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_no_printk_in_rcu(fs_info, fmt, args...) \
|
||||
do { \
|
||||
rcu_read_lock(); \
|
||||
btrfs_no_printk(fs_info, fmt, ##args); \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_printk_ratelimited(fs_info, fmt, args...) \
|
||||
do { \
|
||||
static DEFINE_RATELIMIT_STATE(_rs, \
|
||||
DEFAULT_RATELIMIT_INTERVAL, \
|
||||
DEFAULT_RATELIMIT_BURST); \
|
||||
if (__ratelimit(&_rs)) \
|
||||
btrfs_printk(fs_info, fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \
|
||||
do { \
|
||||
rcu_read_lock(); \
|
||||
btrfs_printk_ratelimited(fs_info, fmt, ##args); \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_BTRFS_ASSERT
|
||||
void __cold btrfs_assertfail(const char *expr, const char *file, int line);
|
||||
|
||||
#define ASSERT(expr) \
|
||||
(likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__))
|
||||
#else
|
||||
#define ASSERT(expr) (void)(expr)
|
||||
#endif
|
||||
|
||||
void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info);
|
||||
|
||||
__printf(5, 6)
|
||||
__cold
|
||||
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...);
|
||||
|
||||
const char * __attribute_const__ btrfs_decode_error(int errno);
|
||||
|
||||
__cold
|
||||
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
const char *function,
|
||||
unsigned int line, int errno, bool first_hit);
|
||||
|
||||
bool __cold abort_should_print_stack(int errno);
|
||||
|
||||
/*
|
||||
* Call btrfs_abort_transaction as early as possible when an error condition is
|
||||
* detected, that way the exact stack trace is reported for some errors.
|
||||
*/
|
||||
#define btrfs_abort_transaction(trans, errno) \
|
||||
do { \
|
||||
bool first = false; \
|
||||
/* Report first abort since mount */ \
|
||||
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
|
||||
&((trans)->fs_info->fs_state))) { \
|
||||
first = true; \
|
||||
if (WARN(abort_should_print_stack(errno), \
|
||||
KERN_ERR \
|
||||
"BTRFS: Transaction aborted (error %d)\n", \
|
||||
(errno))) { \
|
||||
/* Stack trace printed. */ \
|
||||
} else { \
|
||||
btrfs_err((trans)->fs_info, \
|
||||
"Transaction aborted (error %d)", \
|
||||
(errno)); \
|
||||
} \
|
||||
} \
|
||||
__btrfs_abort_transaction((trans), __func__, \
|
||||
__LINE__, (errno), first); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \
|
||||
__btrfs_handle_fs_error((fs_info), __func__, __LINE__, \
|
||||
(errno), fmt, ##args)
|
||||
|
||||
__printf(5, 6)
|
||||
__cold
|
||||
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...);
|
||||
/*
|
||||
* If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic
|
||||
* will panic(). Otherwise we BUG() here.
|
||||
*/
|
||||
#define btrfs_panic(fs_info, errno, fmt, args...) \
|
||||
do { \
|
||||
__btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \
|
||||
BUG(); \
|
||||
} while (0)
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
#define BTRFS_32BIT_MAX_FILE_SIZE (((u64)ULONG_MAX + 1) << PAGE_SHIFT)
|
||||
/*
|
||||
* The warning threshold is 5/8th of the MAX_LFS_FILESIZE that limits the logical
|
||||
* addresses of extents.
|
||||
*
|
||||
* For 4K page size it's about 10T, for 64K it's 160T.
|
||||
*/
|
||||
#define BTRFS_32BIT_EARLY_WARN_THRESHOLD (BTRFS_32BIT_MAX_FILE_SIZE * 5 / 8)
|
||||
void btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info);
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -10,6 +10,14 @@
|
|||
|
||||
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
|
||||
|
||||
/*
|
||||
* Enumerate bits using enum autoincrement. Define the @name as the n-th bit.
|
||||
*/
|
||||
#define ENUM_BIT(name) \
|
||||
__ ## name ## _BIT, \
|
||||
name = (1U << __ ## name ## _BIT), \
|
||||
__ ## name ## _SEQ = __ ## name ## _BIT
|
||||
|
||||
static inline void cond_wake_up(struct wait_queue_head *wq)
|
||||
{
|
||||
/*
|
||||
|
@ -32,22 +40,10 @@ static inline void cond_wake_up_nomb(struct wait_queue_head *wq)
|
|||
wake_up(wq);
|
||||
}
|
||||
|
||||
static inline u64 div_factor(u64 num, int factor)
|
||||
static inline u64 mult_perc(u64 num, u32 percent)
|
||||
{
|
||||
if (factor == 10)
|
||||
return num;
|
||||
num *= factor;
|
||||
return div_u64(num, 10);
|
||||
return div_u64(num * percent, 100);
|
||||
}
|
||||
|
||||
static inline u64 div_factor_fine(u64 num, int factor)
|
||||
{
|
||||
if (factor == 100)
|
||||
return num;
|
||||
num *= factor;
|
||||
return div_u64(num, 100);
|
||||
}
|
||||
|
||||
/* Copy of is_power_of_two that is 64bit safe */
|
||||
static inline bool is_power_of_two_u64(u64 n)
|
||||
{
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <linux/blkdev.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "messages.h"
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "transaction.h"
|
||||
|
@ -17,6 +18,8 @@
|
|||
#include "delalloc-space.h"
|
||||
#include "qgroup.h"
|
||||
#include "subpage.h"
|
||||
#include "file.h"
|
||||
#include "super.h"
|
||||
|
||||
static struct kmem_cache *btrfs_ordered_extent_cache;
|
||||
|
||||
|
@ -143,7 +146,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Add an ordered extent to the per-inode tree.
|
||||
*
|
||||
* @inode: Inode that this extent is for.
|
||||
|
@ -501,7 +504,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
|
|||
ASSERT(list_empty(&entry->log_list));
|
||||
ASSERT(RB_EMPTY_NODE(&entry->rb_node));
|
||||
if (entry->inode)
|
||||
btrfs_add_delayed_iput(entry->inode);
|
||||
btrfs_add_delayed_iput(BTRFS_I(entry->inode));
|
||||
while (!list_empty(&entry->list)) {
|
||||
cur = entry->list.next;
|
||||
sum = list_entry(cur, struct btrfs_ordered_sum, list);
|
||||
|
@ -1019,17 +1022,18 @@ out:
|
|||
}
|
||||
|
||||
/*
|
||||
* btrfs_flush_ordered_range - Lock the passed range and ensures all pending
|
||||
* ordered extents in it are run to completion.
|
||||
* Lock the passed range and ensures all pending ordered extents in it are run
|
||||
* to completion.
|
||||
*
|
||||
* @inode: Inode whose ordered tree is to be searched
|
||||
* @start: Beginning of range to flush
|
||||
* @end: Last byte of range to lock
|
||||
* @cached_state: If passed, will return the extent state responsible for the
|
||||
* locked range. It's the caller's responsibility to free the cached state.
|
||||
* locked range. It's the caller's responsibility to free the
|
||||
* cached state.
|
||||
*
|
||||
* This function always returns with the given range locked, ensuring after it's
|
||||
* called no order extent can be pending.
|
||||
* Always return with the given range locked, ensuring after it's called no
|
||||
* order extent can be pending.
|
||||
*/
|
||||
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
|
@ -1069,11 +1073,12 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
|||
* Return true if btrfs_lock_ordered_range does not return any extents,
|
||||
* otherwise false.
|
||||
*/
|
||||
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end)
|
||||
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
if (!try_lock_extent(&inode->io_tree, start, end))
|
||||
if (!try_lock_extent(&inode->io_tree, start, end, cached_state))
|
||||
return false;
|
||||
|
||||
ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
|
||||
|
@ -1081,7 +1086,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end)
|
|||
return true;
|
||||
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent(&inode->io_tree, start, end, NULL);
|
||||
unlock_extent(&inode->io_tree, start, end, cached_state);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -206,7 +206,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
|||
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state);
|
||||
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state);
|
||||
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
|
||||
u64 post);
|
||||
int __init ordered_data_init(void);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "orphan.h"
|
||||
|
||||
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 offset)
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_ORPHAN_H
|
||||
#define BTRFS_ORPHAN_H
|
||||
|
||||
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 offset);
|
||||
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 offset);
|
||||
|
||||
#endif
|
|
@ -3,9 +3,12 @@
|
|||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "print-tree.h"
|
||||
#include "accessors.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
struct root_name_map {
|
||||
u64 id;
|
||||
|
@ -240,9 +243,9 @@ void btrfs_print_leaf(struct extent_buffer *l)
|
|||
case BTRFS_DIR_ITEM_KEY:
|
||||
di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
|
||||
btrfs_dir_item_key_to_cpu(l, di, &found_key);
|
||||
pr_info("\t\tdir oid %llu type %u\n",
|
||||
pr_info("\t\tdir oid %llu flags %u\n",
|
||||
found_key.objectid,
|
||||
btrfs_dir_type(l, di));
|
||||
btrfs_dir_flags(l, di));
|
||||
break;
|
||||
case BTRFS_ROOT_ITEM_KEY:
|
||||
ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
|
||||
|
@ -384,14 +387,16 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
|
|||
if (!follow)
|
||||
return;
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct btrfs_key first_key;
|
||||
struct btrfs_tree_parent_check check = {
|
||||
.level = level - 1,
|
||||
.transid = btrfs_node_ptr_generation(c, i),
|
||||
.owner_root = btrfs_header_owner(c),
|
||||
.has_first_key = true
|
||||
};
|
||||
struct extent_buffer *next;
|
||||
|
||||
btrfs_node_key_to_cpu(c, &first_key, i);
|
||||
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
|
||||
btrfs_header_owner(c),
|
||||
btrfs_node_ptr_generation(c, i),
|
||||
level - 1, &first_key);
|
||||
btrfs_node_key_to_cpu(c, &check.first_key, i);
|
||||
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i), &check);
|
||||
if (IS_ERR(next))
|
||||
continue;
|
||||
if (!extent_buffer_uptodate(next)) {
|
||||
|
|
|
@ -4,12 +4,17 @@
|
|||
*/
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
#include "messages.h"
|
||||
#include "props.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "transaction.h"
|
||||
#include "ctree.h"
|
||||
#include "xattr.h"
|
||||
#include "compression.h"
|
||||
#include "space-info.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "super.h"
|
||||
|
||||
#define BTRFS_PROP_HANDLERS_HT_BITS 8
|
||||
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
|
||||
|
@ -453,7 +458,7 @@ int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
void __init btrfs_props_init(void)
|
||||
int __init btrfs_props_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -463,5 +468,6 @@ void __init btrfs_props_init(void)
|
|||
|
||||
hash_add(prop_handlers_ht, &p->node, h);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
#include "ctree.h"
|
||||
|
||||
void __init btrfs_props_init(void);
|
||||
int __init btrfs_props_init(void);
|
||||
|
||||
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
const char *name, const char *value, size_t value_len,
|
||||
|
|
|
@ -24,6 +24,11 @@
|
|||
#include "block-group.h"
|
||||
#include "sysfs.h"
|
||||
#include "tree-mod-log.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "root-tree.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
/*
|
||||
* Helpers to access qgroup reservation
|
||||
|
@ -1790,8 +1795,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
|
|||
int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_qgroup_extent_record *qrecord)
|
||||
{
|
||||
struct ulist *old_root;
|
||||
u64 bytenr = qrecord->bytenr;
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
int ret;
|
||||
|
||||
/*
|
||||
|
@ -1818,8 +1822,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
|||
if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
|
||||
true);
|
||||
ctx.bytenr = qrecord->bytenr;
|
||||
ctx.fs_info = trans->fs_info;
|
||||
|
||||
ret = btrfs_find_all_roots(&ctx, true);
|
||||
if (ret < 0) {
|
||||
qgroup_mark_inconsistent(trans->fs_info);
|
||||
btrfs_warn(trans->fs_info,
|
||||
|
@ -1835,12 +1841,12 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
|||
*
|
||||
* So modifying qrecord->old_roots is safe here
|
||||
*/
|
||||
qrecord->old_roots = old_root;
|
||||
qrecord->old_roots = ctx.roots;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
u64 num_bytes, gfp_t gfp_flag)
|
||||
u64 num_bytes)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_qgroup_extent_record *record;
|
||||
|
@ -1850,7 +1856,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
|||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
|
||||
|| bytenr == 0 || num_bytes == 0)
|
||||
return 0;
|
||||
record = kzalloc(sizeof(*record), gfp_flag);
|
||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||
if (!record)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1902,8 +1908,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
|
|||
|
||||
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
|
||||
|
||||
ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
|
||||
GFP_NOFS);
|
||||
ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -2102,12 +2107,11 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
|
|||
* blocks for qgroup accounting.
|
||||
*/
|
||||
ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start,
|
||||
nodesize, GFP_NOFS);
|
||||
nodesize);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = btrfs_qgroup_trace_extent(trans,
|
||||
dst_path->nodes[dst_level]->start,
|
||||
nodesize, GFP_NOFS);
|
||||
ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start,
|
||||
nodesize);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
|
@ -2336,7 +2340,13 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
if (!extent_buffer_uptodate(root_eb)) {
|
||||
ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL);
|
||||
struct btrfs_tree_parent_check check = {
|
||||
.has_first_key = false,
|
||||
.transid = root_gen,
|
||||
.level = root_level
|
||||
};
|
||||
|
||||
ret = btrfs_read_extent_buffer(root_eb, &check);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
@ -2391,8 +2401,7 @@ walk_down:
|
|||
path->locks[level] = BTRFS_READ_LOCK;
|
||||
|
||||
ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
|
||||
fs_info->nodesize,
|
||||
GFP_NOFS);
|
||||
fs_info->nodesize);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
@ -2749,17 +2758,22 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
|
|||
|
||||
if (!ret && !(fs_info->qgroup_flags &
|
||||
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) {
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
|
||||
ctx.bytenr = record->bytenr;
|
||||
ctx.fs_info = fs_info;
|
||||
|
||||
/*
|
||||
* Old roots should be searched when inserting qgroup
|
||||
* extent record
|
||||
*/
|
||||
if (WARN_ON(!record->old_roots)) {
|
||||
/* Search commit root to find old_roots */
|
||||
ret = btrfs_find_all_roots(NULL, fs_info,
|
||||
record->bytenr, 0,
|
||||
&record->old_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
record->old_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
}
|
||||
|
||||
/* Free the reserved data space */
|
||||
|
@ -2772,10 +2786,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
|
|||
* which doesn't lock tree or delayed_refs and search
|
||||
* current root. It's safe inside commit_transaction().
|
||||
*/
|
||||
ret = btrfs_find_all_roots(trans, fs_info,
|
||||
record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
|
||||
ctx.trans = trans;
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
new_roots = ctx.roots;
|
||||
if (qgroup_to_skip) {
|
||||
ulist_del(new_roots, qgroup_to_skip, 0);
|
||||
ulist_del(record->old_roots, qgroup_to_skip,
|
||||
|
@ -3241,7 +3256,6 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *extent_root;
|
||||
struct btrfs_key found;
|
||||
struct extent_buffer *scratch_leaf = NULL;
|
||||
struct ulist *roots = NULL;
|
||||
u64 num_bytes;
|
||||
bool done;
|
||||
int slot;
|
||||
|
@ -3291,6 +3305,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
|
|||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
||||
for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
|
||||
btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
|
||||
if (found.type != BTRFS_EXTENT_ITEM_KEY &&
|
||||
found.type != BTRFS_METADATA_ITEM_KEY)
|
||||
|
@ -3300,13 +3316,15 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
|
|||
else
|
||||
num_bytes = found.offset;
|
||||
|
||||
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
|
||||
&roots, false);
|
||||
ctx.bytenr = found.objectid;
|
||||
ctx.fs_info = fs_info;
|
||||
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/* For rescan, just pass old_roots as NULL */
|
||||
ret = btrfs_qgroup_account_extent(trans, found.objectid,
|
||||
num_bytes, NULL, roots);
|
||||
num_bytes, NULL, ctx.roots);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
@ -4292,6 +4310,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
|||
struct extent_buffer *subvol_eb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
|
||||
struct btrfs_qgroup_swapped_block *block;
|
||||
struct extent_buffer *reloc_eb = NULL;
|
||||
|
@ -4340,10 +4359,13 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
|||
blocks->swapped = swapped;
|
||||
spin_unlock(&blocks->lock);
|
||||
|
||||
check.level = block->level;
|
||||
check.transid = block->reloc_generation;
|
||||
check.has_first_key = true;
|
||||
memcpy(&check.first_key, &block->first_key, sizeof(check.first_key));
|
||||
|
||||
/* Read out reloc subtree root */
|
||||
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0,
|
||||
block->reloc_generation, block->level,
|
||||
&block->first_key);
|
||||
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check);
|
||||
if (IS_ERR(reloc_eb)) {
|
||||
ret = PTR_ERR(reloc_eb);
|
||||
reloc_eb = NULL;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/kobject.h>
|
||||
#include "ulist.h"
|
||||
#include "delayed-ref.h"
|
||||
#include "misc.h"
|
||||
|
||||
/*
|
||||
* Btrfs qgroup overview
|
||||
|
@ -242,9 +243,11 @@ static inline u64 btrfs_qgroup_subvolid(u64 qgroupid)
|
|||
/*
|
||||
* For qgroup event trace points only
|
||||
*/
|
||||
#define QGROUP_RESERVE (1<<0)
|
||||
#define QGROUP_RELEASE (1<<1)
|
||||
#define QGROUP_FREE (1<<2)
|
||||
enum {
|
||||
ENUM_BIT(QGROUP_RESERVE),
|
||||
ENUM_BIT(QGROUP_RELEASE),
|
||||
ENUM_BIT(QGROUP_FREE),
|
||||
};
|
||||
|
||||
int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
|
||||
|
@ -318,7 +321,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
|||
* (NULL trans)
|
||||
*/
|
||||
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
u64 num_bytes, gfp_t gfp_flag);
|
||||
u64 num_bytes);
|
||||
|
||||
/*
|
||||
* Inform qgroup to trace all leaf items of data
|
||||
|
|
2098
fs/btrfs/raid56.c
2098
fs/btrfs/raid56.c
File diff suppressed because it is too large
Load Diff
|
@ -74,12 +74,6 @@ struct btrfs_raid_bio {
|
|||
/* How many sectors there are for each stripe */
|
||||
u8 stripe_nsectors;
|
||||
|
||||
/* First bad stripe, -1 means no corruption */
|
||||
s8 faila;
|
||||
|
||||
/* Second bad stripe (for RAID6 use) */
|
||||
s8 failb;
|
||||
|
||||
/* Stripe number that we're scrubbing */
|
||||
u8 scrubp;
|
||||
|
||||
|
@ -93,9 +87,7 @@ struct btrfs_raid_bio {
|
|||
|
||||
atomic_t stripes_pending;
|
||||
|
||||
atomic_t error;
|
||||
|
||||
struct work_struct end_io_work;
|
||||
wait_queue_head_t io_wait;
|
||||
|
||||
/* Bitmap to record which horizontal stripe has data */
|
||||
unsigned long dbitmap;
|
||||
|
@ -126,6 +118,29 @@ struct btrfs_raid_bio {
|
|||
|
||||
/* Allocated with real_stripes-many pointers for finish_*() calls */
|
||||
void **finish_pointers;
|
||||
|
||||
/*
|
||||
* The bitmap recording where IO errors happened.
|
||||
* Each bit is corresponding to one sector in either bio_sectors[] or
|
||||
* stripe_sectors[] array.
|
||||
*
|
||||
* The reason we don't use another bit in sector_ptr is, we have two
|
||||
* arrays of sectors, and a lot of IO can use sectors in both arrays.
|
||||
* Thus making it much harder to iterate.
|
||||
*/
|
||||
unsigned long *error_bitmap;
|
||||
|
||||
/*
|
||||
* Checksum buffer if the rbio is for data. The buffer should cover
|
||||
* all data sectors (exlcuding P/Q sectors).
|
||||
*/
|
||||
u8 *csum_buf;
|
||||
|
||||
/*
|
||||
* Each bit represents if the corresponding sector has data csum found.
|
||||
* Should only cover data sectors (excluding P/Q sectors).
|
||||
*/
|
||||
unsigned long *csum_bitmap;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
|
|||
(len * sizeof(char)), mask);
|
||||
if (!ret)
|
||||
return ret;
|
||||
strncpy(ret->str, src, len);
|
||||
/* Warn if the source got unexpectedly truncated. */
|
||||
if (WARN_ON(strscpy(ret->str, src, len) < 0)) {
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,11 +5,14 @@
|
|||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "locking.h"
|
||||
#include "delayed-ref.h"
|
||||
#include "ref-verify.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
|
||||
/*
|
||||
* Used to keep track the roots and number of refs each root has for a given
|
||||
|
|
|
@ -2,13 +2,19 @@
|
|||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/iversion.h>
|
||||
#include "compression.h"
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "compression.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "disk-io.h"
|
||||
#include "reflink.h"
|
||||
#include "transaction.h"
|
||||
#include "subpage.h"
|
||||
#include "accessors.h"
|
||||
#include "file-item.h"
|
||||
#include "file.h"
|
||||
#include "super.h"
|
||||
|
||||
#define BTRFS_MAX_DEDUPE_LEN SZ_16M
|
||||
|
||||
|
@ -318,8 +324,8 @@ copy_to_page:
|
|||
goto out;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_clone() - clone a range from inode file to another
|
||||
/*
|
||||
* Clone a range from inode file to another.
|
||||
*
|
||||
* @src: Inode to clone from
|
||||
* @inode: Inode to clone to
|
||||
|
@ -887,7 +893,7 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
|
|||
return -EINVAL;
|
||||
|
||||
if (same_inode) {
|
||||
btrfs_inode_lock(src_inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_lock(BTRFS_I(src_inode), BTRFS_ILOCK_MMAP);
|
||||
} else {
|
||||
lock_two_nondirectories(src_inode, dst_inode);
|
||||
btrfs_double_mmap_lock(src_inode, dst_inode);
|
||||
|
@ -905,7 +911,7 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
|
|||
|
||||
out_unlock:
|
||||
if (same_inode) {
|
||||
btrfs_inode_unlock(src_inode, BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(BTRFS_I(src_inode), BTRFS_ILOCK_MMAP);
|
||||
} else {
|
||||
btrfs_double_mmap_unlock(src_inode, dst_inode);
|
||||
unlock_two_nondirectories(src_inode, dst_inode);
|
||||
|
|
|
@ -27,6 +27,15 @@
|
|||
#include "subpage.h"
|
||||
#include "zoned.h"
|
||||
#include "inode-item.h"
|
||||
#include "space-info.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "root-tree.h"
|
||||
#include "file-item.h"
|
||||
#include "relocation.h"
|
||||
#include "super.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
/*
|
||||
* Relocation overview
|
||||
|
@ -470,7 +479,7 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
|
|||
int ret;
|
||||
int err = 0;
|
||||
|
||||
iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
|
||||
iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info);
|
||||
if (!iter)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
path = btrfs_alloc_path();
|
||||
|
@ -1109,10 +1118,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||
inode = find_next_inode(root, key.objectid);
|
||||
first = 0;
|
||||
} else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
|
||||
btrfs_add_delayed_iput(inode);
|
||||
btrfs_add_delayed_iput(BTRFS_I(inode));
|
||||
inode = find_next_inode(root, key.objectid);
|
||||
}
|
||||
if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
||||
end = key.offset +
|
||||
btrfs_file_extent_num_bytes(leaf, fi);
|
||||
WARN_ON(!IS_ALIGNED(key.offset,
|
||||
|
@ -1120,14 +1131,15 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||
WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
|
||||
end--;
|
||||
ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
|
||||
key.offset, end);
|
||||
key.offset, end,
|
||||
&cached_state);
|
||||
if (!ret)
|
||||
continue;
|
||||
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode),
|
||||
key.offset, end, true);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree,
|
||||
key.offset, end, NULL);
|
||||
key.offset, end, &cached_state);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1170,7 +1182,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||
if (dirty)
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
if (inode)
|
||||
btrfs_add_delayed_iput(inode);
|
||||
btrfs_add_delayed_iput(BTRFS_I(inode));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1516,6 +1528,8 @@ static int invalidate_extent_cache(struct btrfs_root *root,
|
|||
|
||||
objectid = min_key->objectid;
|
||||
while (1) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
||||
cond_resched();
|
||||
iput(inode);
|
||||
|
||||
|
@ -1566,9 +1580,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
|
|||
}
|
||||
|
||||
/* the lock_extent waits for read_folio to complete */
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, true);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -2597,10 +2611,14 @@ static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
|
|||
static int get_tree_block_key(struct btrfs_fs_info *fs_info,
|
||||
struct tree_block *block)
|
||||
{
|
||||
struct btrfs_tree_parent_check check = {
|
||||
.level = block->level,
|
||||
.owner_root = block->owner,
|
||||
.transid = block->key.offset
|
||||
};
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = read_tree_block(fs_info, block->bytenr, block->owner,
|
||||
block->key.offset, block->level, NULL);
|
||||
eb = read_tree_block(fs_info, block->bytenr, &check);
|
||||
if (IS_ERR(eb))
|
||||
return PTR_ERR(eb);
|
||||
if (!extent_buffer_uptodate(eb)) {
|
||||
|
@ -2861,25 +2879,27 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
btrfs_inode_lock(&inode->vfs_inode, 0);
|
||||
btrfs_inode_lock(inode, 0);
|
||||
for (nr = 0; nr < cluster->nr; nr++) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
||||
start = cluster->boundary[nr] - offset;
|
||||
if (nr + 1 < cluster->nr)
|
||||
end = cluster->boundary[nr + 1] - 1 - offset;
|
||||
else
|
||||
end = cluster->end - offset;
|
||||
|
||||
lock_extent(&inode->io_tree, start, end, NULL);
|
||||
lock_extent(&inode->io_tree, start, end, &cached_state);
|
||||
num_bytes = end + 1 - start;
|
||||
ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
|
||||
num_bytes, num_bytes,
|
||||
end + 1, &alloc_hint);
|
||||
cur_offset = end + 1;
|
||||
unlock_extent(&inode->io_tree, start, end, NULL);
|
||||
unlock_extent(&inode->io_tree, start, end, &cached_state);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
btrfs_inode_unlock(&inode->vfs_inode, 0);
|
||||
btrfs_inode_unlock(inode, 0);
|
||||
|
||||
if (cur_offset < prealloc_end)
|
||||
btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
|
||||
|
@ -2891,6 +2911,7 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod
|
|||
u64 start, u64 end, u64 block_start)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct extent_state *cached_state = NULL;
|
||||
int ret = 0;
|
||||
|
||||
em = alloc_extent_map();
|
||||
|
@ -2903,9 +2924,9 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod
|
|||
em->block_start = block_start;
|
||||
set_bit(EXTENT_FLAG_PINNED, &em->flags);
|
||||
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
|
||||
ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
|
||||
free_extent_map(em);
|
||||
|
||||
return ret;
|
||||
|
@ -2983,6 +3004,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
|||
*/
|
||||
cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
|
||||
while (cur <= page_end) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 extent_start = cluster->boundary[*cluster_nr] - offset;
|
||||
u64 extent_end = get_cluster_boundary_end(cluster,
|
||||
*cluster_nr) - offset;
|
||||
|
@ -2998,13 +3020,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
|||
goto release_page;
|
||||
|
||||
/* Mark the range delalloc and dirty for later writeback */
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
|
||||
&cached_state);
|
||||
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
|
||||
clamped_end, 0, NULL);
|
||||
clamped_end, 0, &cached_state);
|
||||
if (ret) {
|
||||
clear_extent_bits(&BTRFS_I(inode)->io_tree,
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree,
|
||||
clamped_start, clamped_end,
|
||||
EXTENT_LOCKED | EXTENT_BOUNDARY);
|
||||
EXTENT_LOCKED | EXTENT_BOUNDARY,
|
||||
&cached_state);
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
clamped_len, true);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||
|
@ -3031,7 +3055,8 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
|||
boundary_start, boundary_end,
|
||||
EXTENT_BOUNDARY);
|
||||
}
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
|
||||
&cached_state);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
|
||||
cur += clamped_len;
|
||||
|
||||
|
@ -3388,24 +3413,28 @@ int add_data_references(struct reloc_control *rc,
|
|||
struct btrfs_path *path,
|
||||
struct rb_root *blocks)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
|
||||
struct ulist *leaves = NULL;
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
struct ulist_iterator leaf_uiter;
|
||||
struct ulist_node *ref_node = NULL;
|
||||
const u32 blocksize = fs_info->nodesize;
|
||||
const u32 blocksize = rc->extent_root->fs_info->nodesize;
|
||||
int ret = 0;
|
||||
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
|
||||
0, &leaves, NULL, true);
|
||||
|
||||
ctx.bytenr = extent_key->objectid;
|
||||
ctx.ignore_extent_item_pos = true;
|
||||
ctx.fs_info = rc->extent_root->fs_info;
|
||||
|
||||
ret = btrfs_find_all_leafs(&ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ULIST_ITER_INIT(&leaf_uiter);
|
||||
while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
|
||||
while ((ref_node = ulist_next(ctx.refs, &leaf_uiter))) {
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
|
||||
eb = read_tree_block(ctx.fs_info, ref_node->val, &check);
|
||||
if (IS_ERR(eb)) {
|
||||
ret = PTR_ERR(eb);
|
||||
break;
|
||||
|
@ -3421,7 +3450,7 @@ int add_data_references(struct reloc_control *rc,
|
|||
}
|
||||
if (ret < 0)
|
||||
free_block_list(blocks);
|
||||
ulist_free(leaves);
|
||||
ulist_free(ctx.refs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3905,8 +3934,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
|
|||
INIT_LIST_HEAD(&rc->dirty_subvol_roots);
|
||||
btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
|
||||
mapping_tree_init(&rc->reloc_root_tree);
|
||||
extent_io_tree_init(fs_info, &rc->processed_blocks,
|
||||
IO_TREE_RELOC_BLOCKS, NULL);
|
||||
extent_io_tree_init(fs_info, &rc->processed_blocks, IO_TREE_RELOC_BLOCKS);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -4330,7 +4358,7 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
|
|||
|
||||
disk_bytenr = file_pos + inode->index_cnt;
|
||||
csum_root = btrfs_csum_root(fs_info, disk_bytenr);
|
||||
ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
|
||||
ret = btrfs_lookup_csums_list(csum_root, disk_bytenr,
|
||||
disk_bytenr + len - 1, &list, 0, false);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_RELOCATION_H
|
||||
#define BTRFS_RELOCATION_H
|
||||
|
||||
int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start);
|
||||
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root);
|
||||
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
|
||||
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct extent_buffer *buf,
|
||||
struct extent_buffer *cow);
|
||||
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
|
||||
u64 *bytes_to_reserve);
|
||||
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_pending_snapshot *pending);
|
||||
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
int btrfs_should_ignore_reloc_root(struct btrfs_root *root);
|
||||
|
||||
#endif
|
|
@ -6,11 +6,16 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/uuid.h>
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
#include "transaction.h"
|
||||
#include "disk-io.h"
|
||||
#include "print-tree.h"
|
||||
#include "qgroup.h"
|
||||
#include "space-info.h"
|
||||
#include "accessors.h"
|
||||
#include "root-tree.h"
|
||||
#include "orphan.h"
|
||||
|
||||
/*
|
||||
* Read a root item from the tree. In case we detect a root item smaller then
|
||||
|
@ -327,9 +332,8 @@ out:
|
|||
}
|
||||
|
||||
int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
||||
u64 ref_id, u64 dirid, u64 *sequence, const char *name,
|
||||
int name_len)
|
||||
|
||||
u64 ref_id, u64 dirid, u64 *sequence,
|
||||
const struct fscrypt_str *name)
|
||||
{
|
||||
struct btrfs_root *tree_root = trans->fs_info->tree_root;
|
||||
struct btrfs_path *path;
|
||||
|
@ -356,8 +360,8 @@ again:
|
|||
struct btrfs_root_ref);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
|
||||
(btrfs_root_ref_name_len(leaf, ref) != name_len) ||
|
||||
memcmp_extent_buffer(leaf, name, ptr, name_len)) {
|
||||
(btrfs_root_ref_name_len(leaf, ref) != name->len) ||
|
||||
memcmp_extent_buffer(leaf, name->name, ptr, name->len)) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
@ -400,8 +404,8 @@ out:
|
|||
* Will return 0, -ENOMEM, or anything from the CoW path
|
||||
*/
|
||||
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
||||
u64 ref_id, u64 dirid, u64 sequence, const char *name,
|
||||
int name_len)
|
||||
u64 ref_id, u64 dirid, u64 sequence,
|
||||
const struct fscrypt_str *name)
|
||||
{
|
||||
struct btrfs_root *tree_root = trans->fs_info->tree_root;
|
||||
struct btrfs_key key;
|
||||
|
@ -420,7 +424,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
|||
key.offset = ref_id;
|
||||
again:
|
||||
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
|
||||
sizeof(*ref) + name_len);
|
||||
sizeof(*ref) + name->len);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_free_path(path);
|
||||
|
@ -431,9 +435,9 @@ again:
|
|||
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
|
||||
btrfs_set_root_ref_dirid(leaf, ref, dirid);
|
||||
btrfs_set_root_ref_sequence(leaf, ref, sequence);
|
||||
btrfs_set_root_ref_name_len(leaf, ref, name_len);
|
||||
btrfs_set_root_ref_name_len(leaf, ref, name->len);
|
||||
ptr = (unsigned long)(ref + 1);
|
||||
write_extent_buffer(leaf, name, ptr, name_len);
|
||||
write_extent_buffer(leaf, name->name, ptr, name->len);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_ROOT_TREE_H
|
||||
#define BTRFS_ROOT_TREE_H
|
||||
|
||||
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
int nitems, bool use_global_rsv);
|
||||
void btrfs_subvolume_release_metadata(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
||||
u64 ref_id, u64 dirid, u64 sequence,
|
||||
const struct fscrypt_str *name);
|
||||
int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
||||
u64 ref_id, u64 dirid, u64 *sequence,
|
||||
const struct fscrypt_str *name);
|
||||
int btrfs_del_root(struct btrfs_trans_handle *trans, const struct btrfs_key *key);
|
||||
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
const struct btrfs_key *key,
|
||||
struct btrfs_root_item *item);
|
||||
int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_key *key,
|
||||
struct btrfs_root_item *item);
|
||||
int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
|
||||
struct btrfs_path *path, struct btrfs_root_item *root_item,
|
||||
struct btrfs_key *root_key);
|
||||
int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_set_root_node(struct btrfs_root_item *item,
|
||||
struct extent_buffer *node);
|
||||
void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
|
||||
void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root);
|
||||
|
||||
#endif
|
|
@ -17,10 +17,13 @@
|
|||
#include "extent_io.h"
|
||||
#include "dev-replace.h"
|
||||
#include "check-integrity.h"
|
||||
#include "rcu-string.h"
|
||||
#include "raid56.h"
|
||||
#include "block-group.h"
|
||||
#include "zoned.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "file-item.h"
|
||||
#include "scrub.h"
|
||||
|
||||
/*
|
||||
* This is only the first step towards a full-features scrub. It reads all
|
||||
|
@ -56,6 +59,17 @@ struct scrub_ctx;
|
|||
|
||||
#define SCRUB_MAX_PAGES (DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE))
|
||||
|
||||
/*
|
||||
* Maximum number of mirrors that can be available for all profiles counting
|
||||
* the target device of dev-replace as one. During an active device replace
|
||||
* procedure, the target device of the copy operation is a mirror for the
|
||||
* filesystem data as well that can be used to read data in order to repair
|
||||
* read errors on other disks.
|
||||
*
|
||||
* Current value is derived from RAID1C4 with 4 copies.
|
||||
*/
|
||||
#define BTRFS_MAX_MIRRORS (4 + 1)
|
||||
|
||||
struct scrub_recover {
|
||||
refcount_t refs;
|
||||
struct btrfs_io_context *bioc;
|
||||
|
@ -284,7 +298,7 @@ static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
|
|||
* Will also allocate new pages for @sblock if needed.
|
||||
*/
|
||||
static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock,
|
||||
u64 logical, gfp_t gfp)
|
||||
u64 logical)
|
||||
{
|
||||
const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT;
|
||||
struct scrub_sector *ssector;
|
||||
|
@ -292,7 +306,7 @@ static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock,
|
|||
/* We must never have scrub_block exceed U32_MAX in size. */
|
||||
ASSERT(logical - sblock->logical < U32_MAX);
|
||||
|
||||
ssector = kzalloc(sizeof(*ssector), gfp);
|
||||
ssector = kzalloc(sizeof(*ssector), GFP_KERNEL);
|
||||
if (!ssector)
|
||||
return NULL;
|
||||
|
||||
|
@ -300,7 +314,7 @@ static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock,
|
|||
if (!sblock->pages[page_index]) {
|
||||
int ret;
|
||||
|
||||
sblock->pages[page_index] = alloc_page(gfp);
|
||||
sblock->pages[page_index] = alloc_page(GFP_KERNEL);
|
||||
if (!sblock->pages[page_index]) {
|
||||
kfree(ssector);
|
||||
return NULL;
|
||||
|
@ -794,8 +808,8 @@ nomem:
|
|||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
void *warn_ctx)
|
||||
static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
|
||||
u64 root, void *warn_ctx)
|
||||
{
|
||||
u32 nlink;
|
||||
int ret;
|
||||
|
@ -862,7 +876,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
|||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
|
||||
swarn->errstr, swarn->logical,
|
||||
rcu_str_deref(swarn->dev->name),
|
||||
btrfs_dev_name(swarn->dev),
|
||||
swarn->physical,
|
||||
root, inum, offset,
|
||||
fs_info->sectorsize, nlink,
|
||||
|
@ -876,7 +890,7 @@ err:
|
|||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
|
||||
swarn->errstr, swarn->logical,
|
||||
rcu_str_deref(swarn->dev->name),
|
||||
btrfs_dev_name(swarn->dev),
|
||||
swarn->physical,
|
||||
root, inum, offset, ret);
|
||||
|
||||
|
@ -894,7 +908,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
|||
struct btrfs_extent_item *ei;
|
||||
struct scrub_warning swarn;
|
||||
unsigned long ptr = 0;
|
||||
u64 extent_item_pos;
|
||||
u64 flags = 0;
|
||||
u64 ref_root;
|
||||
u32 item_size;
|
||||
|
@ -908,8 +921,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
|||
/* Super block error, no need to search extent tree. */
|
||||
if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
|
||||
btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
|
||||
errstr, rcu_str_deref(dev->name),
|
||||
sblock->physical);
|
||||
errstr, btrfs_dev_name(dev), sblock->physical);
|
||||
return;
|
||||
}
|
||||
path = btrfs_alloc_path();
|
||||
|
@ -926,7 +938,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
|||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
extent_item_pos = swarn.logical - found_key.objectid;
|
||||
swarn.extent_item_size = found_key.offset;
|
||||
|
||||
eb = path->nodes[0];
|
||||
|
@ -941,7 +952,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
|||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
|
||||
errstr, swarn.logical,
|
||||
rcu_str_deref(dev->name),
|
||||
btrfs_dev_name(dev),
|
||||
swarn.physical,
|
||||
ref_level ? "node" : "leaf",
|
||||
ret < 0 ? -1 : ref_level,
|
||||
|
@ -949,12 +960,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
|||
} while (ret != 1);
|
||||
btrfs_release_path(path);
|
||||
} else {
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
ctx.bytenr = found_key.objectid;
|
||||
ctx.extent_item_pos = swarn.logical - found_key.objectid;
|
||||
ctx.fs_info = fs_info;
|
||||
|
||||
swarn.path = path;
|
||||
swarn.dev = dev;
|
||||
iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
extent_item_pos, 1,
|
||||
scrub_print_warning_inode, &swarn, false);
|
||||
|
||||
iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn);
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -1358,7 +1375,7 @@ corrected_error:
|
|||
spin_unlock(&sctx->stat_lock);
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"fixed up error at logical %llu on dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
logical, btrfs_dev_name(dev));
|
||||
}
|
||||
} else {
|
||||
did_not_correct_error:
|
||||
|
@ -1367,7 +1384,7 @@ did_not_correct_error:
|
|||
spin_unlock(&sctx->stat_lock);
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"unable to fixup (regular) error at logical %llu on dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
logical, btrfs_dev_name(dev));
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -1480,7 +1497,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
|||
return -EIO;
|
||||
}
|
||||
|
||||
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
|
||||
recover = kzalloc(sizeof(struct scrub_recover), GFP_KERNEL);
|
||||
if (!recover) {
|
||||
btrfs_put_bioc(bioc);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
|
@ -1503,7 +1520,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
|||
sblock = sblocks_for_recheck[mirror_index];
|
||||
sblock->sctx = sctx;
|
||||
|
||||
sector = alloc_scrub_sector(sblock, logical, GFP_NOFS);
|
||||
sector = alloc_scrub_sector(sblock, logical);
|
||||
if (!sector) {
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.malloc_errors++;
|
||||
|
@ -2313,14 +2330,14 @@ static void scrub_missing_raid56_worker(struct work_struct *work)
|
|||
spin_unlock(&sctx->stat_lock);
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"IO error rebuilding logical %llu for dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
logical, btrfs_dev_name(dev));
|
||||
} else if (sblock->header_error || sblock->checksum_error) {
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.uncorrectable_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
btrfs_err_rl_in_rcu(fs_info,
|
||||
"failed to rebuild valid logical %llu for dev %s",
|
||||
logical, rcu_str_deref(dev->name));
|
||||
logical, btrfs_dev_name(dev));
|
||||
} else {
|
||||
scrub_write_block_to_dev_replace(sblock);
|
||||
}
|
||||
|
@ -2425,7 +2442,7 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
|
|||
*/
|
||||
u32 l = min(sectorsize, len);
|
||||
|
||||
sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
|
||||
sector = alloc_scrub_sector(sblock, logical);
|
||||
if (!sector) {
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.malloc_errors++;
|
||||
|
@ -2756,7 +2773,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity,
|
|||
for (index = 0; len > 0; index++) {
|
||||
struct scrub_sector *sector;
|
||||
|
||||
sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
|
||||
sector = alloc_scrub_sector(sblock, logical);
|
||||
if (!sector) {
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.malloc_errors++;
|
||||
|
@ -3221,7 +3238,7 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
|
|||
extent_dev = bioc->stripes[0].dev;
|
||||
btrfs_put_bioc(bioc);
|
||||
|
||||
ret = btrfs_lookup_csums_range(csum_root, extent_start,
|
||||
ret = btrfs_lookup_csums_list(csum_root, extent_start,
|
||||
extent_start + extent_size - 1,
|
||||
&sctx->csum_list, 1, false);
|
||||
if (ret) {
|
||||
|
@ -3447,7 +3464,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
|
|||
cur_logical;
|
||||
|
||||
if (extent_flags & BTRFS_EXTENT_FLAG_DATA) {
|
||||
ret = btrfs_lookup_csums_range(csum_root, cur_logical,
|
||||
ret = btrfs_lookup_csums_list(csum_root, cur_logical,
|
||||
cur_logical + scrub_len - 1,
|
||||
&sctx->csum_list, 1, false);
|
||||
if (ret)
|
||||
|
@ -4284,7 +4301,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
|||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
btrfs_err_in_rcu(fs_info,
|
||||
"scrub on devid %llu: filesystem on %s is not writable",
|
||||
devid, rcu_str_deref(dev->name));
|
||||
devid, btrfs_dev_name(dev));
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_SCRUB_H
|
||||
#define BTRFS_SCRUB_H
|
||||
|
||||
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
u64 end, struct btrfs_scrub_progress *progress,
|
||||
int readonly, int is_dev_replace);
|
||||
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
|
||||
int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
|
||||
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
struct btrfs_scrub_progress *progress);
|
||||
|
||||
#endif
|
480
fs/btrfs/send.c
480
fs/btrfs/send.c
|
@ -27,6 +27,11 @@
|
|||
#include "compression.h"
|
||||
#include "xattr.h"
|
||||
#include "print-tree.h"
|
||||
#include "accessors.h"
|
||||
#include "dir-item.h"
|
||||
#include "file-item.h"
|
||||
#include "ioctl.h"
|
||||
#include "verity.h"
|
||||
|
||||
/*
|
||||
* Maximum number of references an extent can have in order for us to attempt to
|
||||
|
@ -34,7 +39,7 @@
|
|||
* avoid hitting limitations of the backreference walking code (taking a lot of
|
||||
* time and using too much memory for extents with large number of references).
|
||||
*/
|
||||
#define SEND_MAX_EXTENT_REFS 64
|
||||
#define SEND_MAX_EXTENT_REFS 1024
|
||||
|
||||
/*
|
||||
* A fs_path is a helper to dynamically build path names with unknown size.
|
||||
|
@ -71,13 +76,46 @@ struct clone_root {
|
|||
struct btrfs_root *root;
|
||||
u64 ino;
|
||||
u64 offset;
|
||||
|
||||
u64 found_refs;
|
||||
u64 num_bytes;
|
||||
bool found_ref;
|
||||
};
|
||||
|
||||
#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
|
||||
#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
|
||||
|
||||
/*
|
||||
* Limit the root_ids array of struct backref_cache_entry to 12 elements.
|
||||
* This makes the size of a cache entry to be exactly 128 bytes on x86_64.
|
||||
* The most common case is to have a single root for cloning, which corresponds
|
||||
* to the send root. Having the user specify more than 11 clone roots is not
|
||||
* common, and in such rare cases we simply don't use caching if the number of
|
||||
* cloning roots that lead down to a leaf is more than 12.
|
||||
*/
|
||||
#define SEND_MAX_BACKREF_CACHE_ROOTS 12
|
||||
|
||||
/*
|
||||
* Max number of entries in the cache.
|
||||
* With SEND_MAX_BACKREF_CACHE_ROOTS as 12, the size in bytes, excluding
|
||||
* maple tree's internal nodes, is 16K.
|
||||
*/
|
||||
#define SEND_MAX_BACKREF_CACHE_SIZE 128
|
||||
|
||||
/*
|
||||
* A backref cache entry maps a leaf to a list of IDs of roots from which the
|
||||
* leaf is accessible and we can use for clone operations.
|
||||
* With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on
|
||||
* x86_64).
|
||||
*/
|
||||
struct backref_cache_entry {
|
||||
/* List to link to the cache's lru list. */
|
||||
struct list_head list;
|
||||
/* The key for this entry in the cache. */
|
||||
u64 key;
|
||||
u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
|
||||
/* Number of valid elements in the root_ids array. */
|
||||
int num_roots;
|
||||
};
|
||||
|
||||
struct send_ctx {
|
||||
struct file *send_filp;
|
||||
loff_t send_off;
|
||||
|
@ -246,6 +284,14 @@ struct send_ctx {
|
|||
|
||||
struct rb_root rbtree_new_refs;
|
||||
struct rb_root rbtree_deleted_refs;
|
||||
|
||||
struct {
|
||||
u64 last_reloc_trans;
|
||||
struct list_head lru_list;
|
||||
struct maple_tree entries;
|
||||
/* Number of entries stored in the cache. */
|
||||
int size;
|
||||
} backref_cache;
|
||||
};
|
||||
|
||||
struct pending_dir_move {
|
||||
|
@ -1093,7 +1139,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
|
|||
data_len = btrfs_dir_data_len(eb, di);
|
||||
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
|
||||
|
||||
if (btrfs_dir_type(eb, di) == BTRFS_FT_XATTR) {
|
||||
if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
|
||||
if (name_len > XATTR_NAME_MAX) {
|
||||
ret = -ENAMETOOLONG;
|
||||
goto out;
|
||||
|
@ -1236,8 +1282,12 @@ struct backref_ctx {
|
|||
/* may be truncated in case it's the last extent in a file */
|
||||
u64 extent_len;
|
||||
|
||||
/* Just to check for bugs in backref resolving */
|
||||
int found_itself;
|
||||
/* The bytenr the file extent item we are processing refers to. */
|
||||
u64 bytenr;
|
||||
/* The owner (root id) of the data backref for the current extent. */
|
||||
u64 backref_owner;
|
||||
/* The offset of the data backref for the current extent. */
|
||||
u64 backref_offset;
|
||||
};
|
||||
|
||||
static int __clone_root_cmp_bsearch(const void *key, const void *elt)
|
||||
|
@ -1266,32 +1316,33 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2)
|
|||
|
||||
/*
|
||||
* Called for every backref that is found for the current extent.
|
||||
* Results are collected in sctx->clone_roots->ino/offset/found_refs
|
||||
* Results are collected in sctx->clone_roots->ino/offset.
|
||||
*/
|
||||
static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
|
||||
static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
|
||||
void *ctx_)
|
||||
{
|
||||
struct backref_ctx *bctx = ctx_;
|
||||
struct clone_root *found;
|
||||
struct clone_root *clone_root;
|
||||
|
||||
/* First check if the root is in the list of accepted clone sources */
|
||||
found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
|
||||
clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots,
|
||||
bctx->sctx->clone_roots_cnt,
|
||||
sizeof(struct clone_root),
|
||||
__clone_root_cmp_bsearch);
|
||||
if (!found)
|
||||
if (!clone_root)
|
||||
return 0;
|
||||
|
||||
if (found->root == bctx->sctx->send_root &&
|
||||
/* This is our own reference, bail out as we can't clone from it. */
|
||||
if (clone_root->root == bctx->sctx->send_root &&
|
||||
ino == bctx->cur_objectid &&
|
||||
offset == bctx->cur_offset) {
|
||||
bctx->found_itself = 1;
|
||||
}
|
||||
offset == bctx->cur_offset)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Make sure we don't consider clones from send_root that are
|
||||
* behind the current inode/offset.
|
||||
*/
|
||||
if (found->root == bctx->sctx->send_root) {
|
||||
if (clone_root->root == bctx->sctx->send_root) {
|
||||
/*
|
||||
* If the source inode was not yet processed we can't issue a
|
||||
* clone operation, as the source extent does not exist yet at
|
||||
|
@ -1312,21 +1363,217 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
|
|||
}
|
||||
|
||||
bctx->found++;
|
||||
found->found_refs++;
|
||||
if (ino < found->ino) {
|
||||
found->ino = ino;
|
||||
found->offset = offset;
|
||||
} else if (found->ino == ino) {
|
||||
clone_root->found_ref = true;
|
||||
|
||||
/*
|
||||
* same extent found more then once in the same file.
|
||||
* If the given backref refers to a file extent item with a larger
|
||||
* number of bytes than what we found before, use the new one so that
|
||||
* we clone more optimally and end up doing less writes and getting
|
||||
* less exclusive, non-shared extents at the destination.
|
||||
*/
|
||||
if (found->offset > offset + bctx->extent_len)
|
||||
found->offset = offset;
|
||||
if (num_bytes > clone_root->num_bytes) {
|
||||
clone_root->ino = ino;
|
||||
clone_root->offset = offset;
|
||||
clone_root->num_bytes = num_bytes;
|
||||
|
||||
/*
|
||||
* Found a perfect candidate, so there's no need to continue
|
||||
* backref walking.
|
||||
*/
|
||||
if (num_bytes >= bctx->extent_len)
|
||||
return BTRFS_ITERATE_EXTENT_INODES_STOP;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void empty_backref_cache(struct send_ctx *sctx)
|
||||
{
|
||||
struct backref_cache_entry *entry;
|
||||
struct backref_cache_entry *tmp;
|
||||
|
||||
list_for_each_entry_safe(entry, tmp, &sctx->backref_cache.lru_list, list)
|
||||
kfree(entry);
|
||||
|
||||
INIT_LIST_HEAD(&sctx->backref_cache.lru_list);
|
||||
mtree_destroy(&sctx->backref_cache.entries);
|
||||
sctx->backref_cache.size = 0;
|
||||
}
|
||||
|
||||
static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
|
||||
const u64 **root_ids_ret, int *root_count_ret)
|
||||
{
|
||||
struct backref_ctx *bctx = ctx;
|
||||
struct send_ctx *sctx = bctx->sctx;
|
||||
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
|
||||
const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
|
||||
struct backref_cache_entry *entry;
|
||||
|
||||
if (sctx->backref_cache.size == 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If relocation happened since we first filled the cache, then we must
|
||||
* empty the cache and can not use it, because even though we operate on
|
||||
* read-only roots, their leaves and nodes may have been reallocated and
|
||||
* now be used for different nodes/leaves of the same tree or some other
|
||||
* tree.
|
||||
*
|
||||
* We are called from iterate_extent_inodes() while either holding a
|
||||
* transaction handle or holding fs_info->commit_root_sem, so no need
|
||||
* to take any lock here.
|
||||
*/
|
||||
if (fs_info->last_reloc_trans > sctx->backref_cache.last_reloc_trans) {
|
||||
empty_backref_cache(sctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
entry = mtree_load(&sctx->backref_cache.entries, key);
|
||||
if (!entry)
|
||||
return false;
|
||||
|
||||
*root_ids_ret = entry->root_ids;
|
||||
*root_count_ret = entry->num_roots;
|
||||
list_move_tail(&entry->list, &sctx->backref_cache.lru_list);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
|
||||
void *ctx)
|
||||
{
|
||||
struct backref_ctx *bctx = ctx;
|
||||
struct send_ctx *sctx = bctx->sctx;
|
||||
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
|
||||
struct backref_cache_entry *new_entry;
|
||||
struct ulist_iterator uiter;
|
||||
struct ulist_node *node;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We're called while holding a transaction handle or while holding
|
||||
* fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a
|
||||
* NOFS allocation.
|
||||
*/
|
||||
new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS);
|
||||
/* No worries, cache is optional. */
|
||||
if (!new_entry)
|
||||
return;
|
||||
|
||||
new_entry->key = leaf_bytenr >> fs_info->sectorsize_bits;
|
||||
new_entry->num_roots = 0;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((node = ulist_next(root_ids, &uiter)) != NULL) {
|
||||
const u64 root_id = node->val;
|
||||
struct clone_root *root;
|
||||
|
||||
root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots,
|
||||
sctx->clone_roots_cnt, sizeof(struct clone_root),
|
||||
__clone_root_cmp_bsearch);
|
||||
if (!root)
|
||||
continue;
|
||||
|
||||
/* Too many roots, just exit, no worries as caching is optional. */
|
||||
if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) {
|
||||
kfree(new_entry);
|
||||
return;
|
||||
}
|
||||
|
||||
new_entry->root_ids[new_entry->num_roots] = root_id;
|
||||
new_entry->num_roots++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We may have not added any roots to the new cache entry, which means
|
||||
* none of the roots is part of the list of roots from which we are
|
||||
* allowed to clone. Cache the new entry as it's still useful to avoid
|
||||
* backref walking to determine which roots have a path to the leaf.
|
||||
*/
|
||||
|
||||
if (sctx->backref_cache.size >= SEND_MAX_BACKREF_CACHE_SIZE) {
|
||||
struct backref_cache_entry *lru_entry;
|
||||
struct backref_cache_entry *mt_entry;
|
||||
|
||||
lru_entry = list_first_entry(&sctx->backref_cache.lru_list,
|
||||
struct backref_cache_entry, list);
|
||||
mt_entry = mtree_erase(&sctx->backref_cache.entries, lru_entry->key);
|
||||
ASSERT(mt_entry == lru_entry);
|
||||
list_del(&mt_entry->list);
|
||||
kfree(mt_entry);
|
||||
sctx->backref_cache.size--;
|
||||
}
|
||||
|
||||
ret = mtree_insert(&sctx->backref_cache.entries, new_entry->key,
|
||||
new_entry, GFP_NOFS);
|
||||
ASSERT(ret == 0 || ret == -ENOMEM);
|
||||
if (ret) {
|
||||
/* Caching is optional, no worries. */
|
||||
kfree(new_entry);
|
||||
return;
|
||||
}
|
||||
|
||||
list_add_tail(&new_entry->list, &sctx->backref_cache.lru_list);
|
||||
|
||||
/*
|
||||
* We are called from iterate_extent_inodes() while either holding a
|
||||
* transaction handle or holding fs_info->commit_root_sem, so no need
|
||||
* to take any lock here.
|
||||
*/
|
||||
if (sctx->backref_cache.size == 0)
|
||||
sctx->backref_cache.last_reloc_trans = fs_info->last_reloc_trans;
|
||||
|
||||
sctx->backref_cache.size++;
|
||||
}
|
||||
|
||||
static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
|
||||
const struct extent_buffer *leaf, void *ctx)
|
||||
{
|
||||
const u64 refs = btrfs_extent_refs(leaf, ei);
|
||||
const struct backref_ctx *bctx = ctx;
|
||||
const struct send_ctx *sctx = bctx->sctx;
|
||||
|
||||
if (bytenr == bctx->bytenr) {
|
||||
const u64 flags = btrfs_extent_flags(leaf, ei);
|
||||
|
||||
if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
|
||||
return -EUCLEAN;
|
||||
|
||||
/*
|
||||
* If we have only one reference and only the send root as a
|
||||
* clone source - meaning no clone roots were given in the
|
||||
* struct btrfs_ioctl_send_args passed to the send ioctl - then
|
||||
* it's our reference and there's no point in doing backref
|
||||
* walking which is expensive, so exit early.
|
||||
*/
|
||||
if (refs == 1 && sctx->clone_roots_cnt == 1)
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Backreference walking (iterate_extent_inodes() below) is currently
|
||||
* too expensive when an extent has a large number of references, both
|
||||
* in time spent and used memory. So for now just fallback to write
|
||||
* operations instead of clone operations when an extent has more than
|
||||
* a certain amount of references.
|
||||
*/
|
||||
if (refs > SEND_MAX_EXTENT_REFS)
|
||||
return -ENOENT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
|
||||
{
|
||||
const struct backref_ctx *bctx = ctx;
|
||||
|
||||
if (ino == bctx->cur_objectid &&
|
||||
root == bctx->backref_owner &&
|
||||
offset == bctx->backref_offset)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an inode, offset and extent item, it finds a good clone for a clone
|
||||
* instruction. Returns -ENOENT when none could be found. The function makes
|
||||
|
@ -1348,79 +1595,36 @@ static int find_extent_clone(struct send_ctx *sctx,
|
|||
u64 logical;
|
||||
u64 disk_byte;
|
||||
u64 num_bytes;
|
||||
u64 extent_item_pos;
|
||||
u64 flags = 0;
|
||||
struct btrfs_file_extent_item *fi;
|
||||
struct extent_buffer *eb = path->nodes[0];
|
||||
struct backref_ctx backref_ctx = {0};
|
||||
struct backref_ctx backref_ctx = { 0 };
|
||||
struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 };
|
||||
struct clone_root *cur_clone_root;
|
||||
struct btrfs_key found_key;
|
||||
struct btrfs_path *tmp_path;
|
||||
struct btrfs_extent_item *ei;
|
||||
int compressed;
|
||||
u32 i;
|
||||
|
||||
tmp_path = alloc_path_for_send();
|
||||
if (!tmp_path)
|
||||
return -ENOMEM;
|
||||
|
||||
/* We only use this path under the commit sem */
|
||||
tmp_path->need_commit_sem = 0;
|
||||
|
||||
if (data_offset >= ino_size) {
|
||||
/*
|
||||
* There may be extents that lie behind the file's size.
|
||||
* I at least had this in combination with snapshotting while
|
||||
* writing large files.
|
||||
* With fallocate we can get prealloc extents beyond the inode's i_size,
|
||||
* so we don't do anything here because clone operations can not clone
|
||||
* to a range beyond i_size without increasing the i_size of the
|
||||
* destination inode.
|
||||
*/
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
if (data_offset >= ino_size)
|
||||
return 0;
|
||||
|
||||
fi = btrfs_item_ptr(eb, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item);
|
||||
extent_type = btrfs_file_extent_type(eb, fi);
|
||||
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
compressed = btrfs_file_extent_compression(eb, fi);
|
||||
if (extent_type == BTRFS_FILE_EXTENT_INLINE)
|
||||
return -ENOENT;
|
||||
|
||||
num_bytes = btrfs_file_extent_num_bytes(eb, fi);
|
||||
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
|
||||
if (disk_byte == 0) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
if (disk_byte == 0)
|
||||
return -ENOENT;
|
||||
|
||||
compressed = btrfs_file_extent_compression(eb, fi);
|
||||
num_bytes = btrfs_file_extent_num_bytes(eb, fi);
|
||||
logical = disk_byte + btrfs_file_extent_offset(eb, fi);
|
||||
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
ret = extent_from_logical(fs_info, disk_byte, tmp_path,
|
||||
&found_key, &flags);
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
|
||||
struct btrfs_extent_item);
|
||||
/*
|
||||
* Backreference walking (iterate_extent_inodes() below) is currently
|
||||
* too expensive when an extent has a large number of references, both
|
||||
* in time spent and used memory. So for now just fallback to write
|
||||
* operations instead of clone operations when an extent has more than
|
||||
* a certain amount of references.
|
||||
*/
|
||||
if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
btrfs_release_path(tmp_path);
|
||||
|
||||
/*
|
||||
* Setup the clone roots.
|
||||
*/
|
||||
|
@ -1428,37 +1632,59 @@ static int find_extent_clone(struct send_ctx *sctx,
|
|||
cur_clone_root = sctx->clone_roots + i;
|
||||
cur_clone_root->ino = (u64)-1;
|
||||
cur_clone_root->offset = 0;
|
||||
cur_clone_root->found_refs = 0;
|
||||
cur_clone_root->num_bytes = 0;
|
||||
cur_clone_root->found_ref = false;
|
||||
}
|
||||
|
||||
backref_ctx.sctx = sctx;
|
||||
backref_ctx.found = 0;
|
||||
backref_ctx.cur_objectid = ino;
|
||||
backref_ctx.cur_offset = data_offset;
|
||||
backref_ctx.found_itself = 0;
|
||||
backref_ctx.extent_len = num_bytes;
|
||||
backref_ctx.bytenr = disk_byte;
|
||||
/*
|
||||
* Use the header owner and not the send root's id, because in case of a
|
||||
* snapshot we can have shared subtrees.
|
||||
*/
|
||||
backref_ctx.backref_owner = btrfs_header_owner(eb);
|
||||
backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
|
||||
|
||||
/*
|
||||
* The last extent of a file may be too large due to page alignment.
|
||||
* We need to adjust extent_len in this case so that the checks in
|
||||
* __iterate_backrefs work.
|
||||
* iterate_backrefs() work.
|
||||
*/
|
||||
if (data_offset + num_bytes >= ino_size)
|
||||
backref_ctx.extent_len = ino_size - data_offset;
|
||||
else
|
||||
backref_ctx.extent_len = num_bytes;
|
||||
|
||||
/*
|
||||
* Now collect all backrefs.
|
||||
*/
|
||||
backref_walk_ctx.bytenr = disk_byte;
|
||||
if (compressed == BTRFS_COMPRESS_NONE)
|
||||
extent_item_pos = logical - found_key.objectid;
|
||||
else
|
||||
extent_item_pos = 0;
|
||||
ret = iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
extent_item_pos, 1, __iterate_backrefs,
|
||||
&backref_ctx, false);
|
||||
backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi);
|
||||
backref_walk_ctx.fs_info = fs_info;
|
||||
backref_walk_ctx.cache_lookup = lookup_backref_cache;
|
||||
backref_walk_ctx.cache_store = store_backref_cache;
|
||||
backref_walk_ctx.indirect_ref_iterator = iterate_backrefs;
|
||||
backref_walk_ctx.check_extent_item = check_extent_item;
|
||||
backref_walk_ctx.user_ctx = &backref_ctx;
|
||||
|
||||
/*
|
||||
* If have a single clone root, then it's the send root and we can tell
|
||||
* the backref walking code to skip our own backref and not resolve it,
|
||||
* since we can not use it for cloning - the source and destination
|
||||
* ranges can't overlap and in case the leaf is shared through a subtree
|
||||
* due to snapshots, we can't use those other roots since they are not
|
||||
* in the list of clone roots.
|
||||
*/
|
||||
if (sctx->clone_roots_cnt == 1)
|
||||
backref_walk_ctx.skip_data_ref = skip_self_data_ref;
|
||||
|
||||
ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
|
||||
&backref_ctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
|
||||
|
@ -1475,37 +1701,42 @@ static int find_extent_clone(struct send_ctx *sctx,
|
|||
* was already reallocated after the relocation.
|
||||
*/
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
return -ENOENT;
|
||||
}
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
|
||||
if (!backref_ctx.found_itself) {
|
||||
/* found a bug in backref code? */
|
||||
ret = -EIO;
|
||||
btrfs_err(fs_info,
|
||||
"did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu",
|
||||
ino, data_offset, disk_byte, found_key.objectid);
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_debug(fs_info,
|
||||
"find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
|
||||
data_offset, ino, num_bytes, logical);
|
||||
|
||||
if (!backref_ctx.found)
|
||||
if (!backref_ctx.found) {
|
||||
btrfs_debug(fs_info, "no clones found");
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
cur_clone_root = NULL;
|
||||
for (i = 0; i < sctx->clone_roots_cnt; i++) {
|
||||
if (sctx->clone_roots[i].found_refs) {
|
||||
if (!cur_clone_root)
|
||||
cur_clone_root = sctx->clone_roots + i;
|
||||
else if (sctx->clone_roots[i].root == sctx->send_root)
|
||||
/* prefer clones from send_root over others */
|
||||
cur_clone_root = sctx->clone_roots + i;
|
||||
}
|
||||
struct clone_root *clone_root = &sctx->clone_roots[i];
|
||||
|
||||
if (!clone_root->found_ref)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Choose the root from which we can clone more bytes, to
|
||||
* minimize write operations and therefore have more extent
|
||||
* sharing at the destination (the same as in the source).
|
||||
*/
|
||||
if (!cur_clone_root ||
|
||||
clone_root->num_bytes > cur_clone_root->num_bytes) {
|
||||
cur_clone_root = clone_root;
|
||||
|
||||
/*
|
||||
* We found an optimal clone candidate (any inode from
|
||||
* any root is fine), so we're done.
|
||||
*/
|
||||
if (clone_root->num_bytes >= backref_ctx.extent_len)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_clone_root) {
|
||||
|
@ -1515,8 +1746,6 @@ static int find_extent_clone(struct send_ctx *sctx,
|
|||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
out:
|
||||
btrfs_free_path(tmp_path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1596,13 +1825,17 @@ static int gen_unique_name(struct send_ctx *sctx,
|
|||
return -ENOMEM;
|
||||
|
||||
while (1) {
|
||||
struct fscrypt_str tmp_name;
|
||||
|
||||
len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
|
||||
ino, gen, idx);
|
||||
ASSERT(len < sizeof(tmp));
|
||||
tmp_name.name = tmp;
|
||||
tmp_name.len = strlen(tmp);
|
||||
|
||||
di = btrfs_lookup_dir_item(NULL, sctx->send_root,
|
||||
path, BTRFS_FIRST_FREE_OBJECTID,
|
||||
tmp, strlen(tmp), 0);
|
||||
&tmp_name, 0);
|
||||
btrfs_release_path(path);
|
||||
if (IS_ERR(di)) {
|
||||
ret = PTR_ERR(di);
|
||||
|
@ -1622,7 +1855,7 @@ static int gen_unique_name(struct send_ctx *sctx,
|
|||
|
||||
di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
|
||||
path, BTRFS_FIRST_FREE_OBJECTID,
|
||||
tmp, strlen(tmp), 0);
|
||||
&tmp_name, 0);
|
||||
btrfs_release_path(path);
|
||||
if (IS_ERR(di)) {
|
||||
ret = PTR_ERR(di);
|
||||
|
@ -1752,13 +1985,13 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
|
|||
struct btrfs_dir_item *di;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
di = btrfs_lookup_dir_item(NULL, root, path,
|
||||
dir, name, name_len, 0);
|
||||
di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
|
||||
if (IS_ERR_OR_NULL(di)) {
|
||||
ret = di ? PTR_ERR(di) : -ENOENT;
|
||||
goto out;
|
||||
|
@ -7863,6 +8096,9 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
|
|||
INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
|
||||
INIT_LIST_HEAD(&sctx->name_cache_list);
|
||||
|
||||
INIT_LIST_HEAD(&sctx->backref_cache.lru_list);
|
||||
mt_init(&sctx->backref_cache.entries);
|
||||
|
||||
sctx->flags = arg->flags;
|
||||
|
||||
if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
|
||||
|
@ -7901,7 +8137,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
|
|||
if (sctx->proto >= 2) {
|
||||
u32 send_buf_num_pages;
|
||||
|
||||
sctx->send_max_size = ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED, PAGE_SIZE);
|
||||
sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2;
|
||||
sctx->send_buf = vmalloc(sctx->send_max_size);
|
||||
if (!sctx->send_buf) {
|
||||
ret = -ENOMEM;
|
||||
|
@ -8125,6 +8361,8 @@ out:
|
|||
|
||||
close_current_inode(sctx);
|
||||
|
||||
empty_backref_cache(sctx);
|
||||
|
||||
kfree(sctx);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,10 +18,12 @@
|
|||
#endif
|
||||
|
||||
/*
|
||||
* In send stream v1, no command is larger than 64K. In send stream v2, no limit
|
||||
* should be assumed.
|
||||
* In send stream v1, no command is larger than 64K. In send stream v2, no
|
||||
* limit should be assumed, the buffer size is set to be a header with
|
||||
* compressed extent size.
|
||||
*/
|
||||
#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
|
||||
#define BTRFS_SEND_BUF_SIZE_V2 ALIGN(SZ_16K + BTRFS_MAX_COMPRESSED, PAGE_SIZE)
|
||||
|
||||
struct inode;
|
||||
struct btrfs_ioctl_send_args;
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
#include "transaction.h"
|
||||
#include "block-group.h"
|
||||
#include "zoned.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
|
||||
/*
|
||||
* HOW DOES SPACE RESERVATION WORK
|
||||
|
@ -856,7 +859,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
|
|||
u64 thresh;
|
||||
u64 used;
|
||||
|
||||
thresh = div_factor_fine(total, 90);
|
||||
thresh = mult_perc(total, 90);
|
||||
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
|
||||
|
@ -974,7 +977,7 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
|
|||
return false;
|
||||
|
||||
spin_lock(&global_rsv->lock);
|
||||
min_bytes = div_factor(global_rsv->size, 1);
|
||||
min_bytes = mult_perc(global_rsv->size, 10);
|
||||
if (global_rsv->reserved < min_bytes + ticket->bytes) {
|
||||
spin_unlock(&global_rsv->lock);
|
||||
return false;
|
||||
|
@ -1490,8 +1493,8 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
|
|||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the appropriate flushing and waiting for a ticket
|
||||
/*
|
||||
* Do the appropriate flushing and waiting for a ticket.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @space_info: space info for the reservation
|
||||
|
@ -1583,8 +1586,18 @@ static inline bool can_steal(enum btrfs_reserve_flush_enum flush)
|
|||
flush == BTRFS_RESERVE_FLUSH_EVICT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to reserve bytes from the block_rsv's space
|
||||
/*
|
||||
* NO_FLUSH and FLUSH_EMERGENCY don't want to create a ticket, they just want to
|
||||
* fail as quickly as possible.
|
||||
*/
|
||||
static inline bool can_ticket(enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
return (flush != BTRFS_RESERVE_NO_FLUSH &&
|
||||
flush != BTRFS_RESERVE_FLUSH_EMERGENCY);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to reserve bytes from the block_rsv's space.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @space_info: space info we want to allocate from
|
||||
|
@ -1644,6 +1657,21 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
|
|||
ret = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Things are dire, we need to make a reservation so we don't abort. We
|
||||
* will let this reservation go through as long as we have actual space
|
||||
* left to allocate for the block.
|
||||
*/
|
||||
if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
|
||||
used = btrfs_space_info_used(space_info, false);
|
||||
if (used + orig_bytes <=
|
||||
writable_total_bytes(fs_info, space_info)) {
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
||||
orig_bytes);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we couldn't make a reservation then setup our reservation ticket
|
||||
* and kick the async worker if it's not already running.
|
||||
|
@ -1651,7 +1679,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
|
|||
* If we are a priority flusher then we just need to add our ticket to
|
||||
* the list and we will do our own flushing further down.
|
||||
*/
|
||||
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
|
||||
if (ret && can_ticket(flush)) {
|
||||
ticket.bytes = orig_bytes;
|
||||
ticket.error = 0;
|
||||
space_info->reclaim_size += ticket.bytes;
|
||||
|
@ -1701,15 +1729,15 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
|
||||
if (!ret || !can_ticket(flush))
|
||||
return ret;
|
||||
|
||||
return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns,
|
||||
orig_bytes, flush);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trye to reserve metadata bytes from the block_rsv's space
|
||||
/*
|
||||
* Try to reserve metadata bytes from the block_rsv's space.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @block_rsv: block_rsv we're allocating for
|
||||
|
@ -1743,8 +1771,8 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to reserve data bytes for an allocation
|
||||
/*
|
||||
* Try to reserve data bytes for an allocation.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @bytes: number of bytes we need
|
||||
|
@ -1787,3 +1815,37 @@ __cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
|
|||
}
|
||||
dump_global_block_rsv(fs_info);
|
||||
}
|
||||
|
||||
/*
|
||||
* Account the unused space of all the readonly block group in the space_info.
|
||||
* takes mirrors into account.
|
||||
*/
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 free_bytes = 0;
|
||||
int factor;
|
||||
|
||||
/* It's df, we don't care if it's racy */
|
||||
if (list_empty(&sinfo->ro_bgs))
|
||||
return 0;
|
||||
|
||||
spin_lock(&sinfo->lock);
|
||||
list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (!block_group->ro) {
|
||||
spin_unlock(&block_group->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
factor = btrfs_bg_type_to_factor(block_group->flags);
|
||||
free_bytes += (block_group->length -
|
||||
block_group->used) * factor;
|
||||
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
spin_unlock(&sinfo->lock);
|
||||
|
||||
return free_bytes;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,83 @@
|
|||
|
||||
#include "volumes.h"
|
||||
|
||||
/*
|
||||
* Different levels for to flush space when doing space reservations.
|
||||
*
|
||||
* The higher the level, the more methods we try to reclaim space.
|
||||
*/
|
||||
enum btrfs_reserve_flush_enum {
|
||||
/* If we are in the transaction, we can't flush anything.*/
|
||||
BTRFS_RESERVE_NO_FLUSH,
|
||||
|
||||
/*
|
||||
* Flush space by:
|
||||
* - Running delayed inode items
|
||||
* - Allocating a new chunk
|
||||
*/
|
||||
BTRFS_RESERVE_FLUSH_LIMIT,
|
||||
|
||||
/*
|
||||
* Flush space by:
|
||||
* - Running delayed inode items
|
||||
* - Running delayed refs
|
||||
* - Running delalloc and waiting for ordered extents
|
||||
* - Allocating a new chunk
|
||||
*/
|
||||
BTRFS_RESERVE_FLUSH_EVICT,
|
||||
|
||||
/*
|
||||
* Flush space by above mentioned methods and by:
|
||||
* - Running delayed iputs
|
||||
* - Committing transaction
|
||||
*
|
||||
* Can be interrupted by a fatal signal.
|
||||
*/
|
||||
BTRFS_RESERVE_FLUSH_DATA,
|
||||
BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE,
|
||||
BTRFS_RESERVE_FLUSH_ALL,
|
||||
|
||||
/*
|
||||
* Pretty much the same as FLUSH_ALL, but can also steal space from
|
||||
* global rsv.
|
||||
*
|
||||
* Can be interrupted by a fatal signal.
|
||||
*/
|
||||
BTRFS_RESERVE_FLUSH_ALL_STEAL,
|
||||
|
||||
/*
|
||||
* This is for btrfs_use_block_rsv only. We have exhausted our block
|
||||
* rsv and our global block rsv. This can happen for things like
|
||||
* delalloc where we are overwriting a lot of extents with a single
|
||||
* extent and didn't reserve enough space. Alternatively it can happen
|
||||
* with delalloc where we reserve 1 extents worth for a large extent but
|
||||
* fragmentation leads to multiple extents being created. This will
|
||||
* give us the reservation in the case of
|
||||
*
|
||||
* if (num_bytes < (space_info->total_bytes -
|
||||
* btrfs_space_info_used(space_info, false))
|
||||
*
|
||||
* Which ignores bytes_may_use. This is potentially dangerous, but our
|
||||
* reservation system is generally pessimistic so is able to absorb this
|
||||
* style of mistake.
|
||||
*/
|
||||
BTRFS_RESERVE_FLUSH_EMERGENCY,
|
||||
};
|
||||
|
||||
enum btrfs_flush_state {
|
||||
FLUSH_DELAYED_ITEMS_NR = 1,
|
||||
FLUSH_DELAYED_ITEMS = 2,
|
||||
FLUSH_DELAYED_REFS_NR = 3,
|
||||
FLUSH_DELAYED_REFS = 4,
|
||||
FLUSH_DELALLOC = 5,
|
||||
FLUSH_DELALLOC_WAIT = 6,
|
||||
FLUSH_DELALLOC_FULL = 7,
|
||||
ALLOC_CHUNK = 8,
|
||||
ALLOC_CHUNK_FORCE = 9,
|
||||
RUN_DELAYED_IPUTS = 10,
|
||||
COMMIT_TRANS = 11,
|
||||
};
|
||||
|
||||
struct btrfs_space_info {
|
||||
spinlock_t lock;
|
||||
|
||||
|
@ -159,5 +236,6 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
|
|||
enum btrfs_reserve_flush_enum flush);
|
||||
void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
|
||||
|
||||
#endif /* BTRFS_SPACE_INFO_H */
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "subpage.h"
|
||||
#include "btrfs_inode.h"
|
||||
|
|
554
fs/btrfs/super.c
554
fs/btrfs/super.c
|
@ -26,6 +26,7 @@
|
|||
#include <linux/ratelimit.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/btrfs.h>
|
||||
#include "messages.h"
|
||||
#include "delayed-inode.h"
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
|
@ -34,7 +35,7 @@
|
|||
#include "print-tree.h"
|
||||
#include "props.h"
|
||||
#include "xattr.h"
|
||||
#include "volumes.h"
|
||||
#include "bio.h"
|
||||
#include "export.h"
|
||||
#include "compression.h"
|
||||
#include "rcu-string.h"
|
||||
|
@ -49,6 +50,14 @@
|
|||
#include "discard.h"
|
||||
#include "qgroup.h"
|
||||
#include "raid56.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "defrag.h"
|
||||
#include "dir-item.h"
|
||||
#include "ioctl.h"
|
||||
#include "scrub.h"
|
||||
#include "verity.h"
|
||||
#include "super.h"
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/btrfs.h>
|
||||
|
||||
|
@ -67,328 +76,6 @@ static struct file_system_type btrfs_root_fs_type;
|
|||
|
||||
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define STATE_STRING_PREFACE ": state "
|
||||
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
|
||||
|
||||
/*
|
||||
* Characters to print to indicate error conditions or uncommon filesystem state.
|
||||
* RO is not an error.
|
||||
*/
|
||||
static const char fs_state_chars[] = {
|
||||
[BTRFS_FS_STATE_ERROR] = 'E',
|
||||
[BTRFS_FS_STATE_REMOUNTING] = 'M',
|
||||
[BTRFS_FS_STATE_RO] = 0,
|
||||
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
|
||||
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
|
||||
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
|
||||
[BTRFS_FS_STATE_NO_CSUMS] = 'C',
|
||||
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
|
||||
};
|
||||
|
||||
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
|
||||
{
|
||||
unsigned int bit;
|
||||
bool states_printed = false;
|
||||
unsigned long fs_state = READ_ONCE(info->fs_state);
|
||||
char *curr = buf;
|
||||
|
||||
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
|
||||
curr += sizeof(STATE_STRING_PREFACE) - 1;
|
||||
|
||||
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
|
||||
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
|
||||
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
|
||||
*curr++ = fs_state_chars[bit];
|
||||
states_printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* If no states were printed, reset the buffer */
|
||||
if (!states_printed)
|
||||
curr = buf;
|
||||
|
||||
*curr++ = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Generally the error codes correspond to their respective errors, but there
|
||||
* are a few special cases.
|
||||
*
|
||||
* EUCLEAN: Any sort of corruption that we encounter. The tree-checker for
|
||||
* instance will return EUCLEAN if any of the blocks are corrupted in
|
||||
* a way that is problematic. We want to reserve EUCLEAN for these
|
||||
* sort of corruptions.
|
||||
*
|
||||
* EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
|
||||
* need to use EROFS for this case. We will have no idea of the
|
||||
* original failure, that will have been reported at the time we tripped
|
||||
* over the error. Each subsequent error that doesn't have any context
|
||||
* of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
|
||||
*/
|
||||
const char * __attribute_const__ btrfs_decode_error(int errno)
|
||||
{
|
||||
char *errstr = "unknown";
|
||||
|
||||
switch (errno) {
|
||||
case -ENOENT: /* -2 */
|
||||
errstr = "No such entry";
|
||||
break;
|
||||
case -EIO: /* -5 */
|
||||
errstr = "IO failure";
|
||||
break;
|
||||
case -ENOMEM: /* -12*/
|
||||
errstr = "Out of memory";
|
||||
break;
|
||||
case -EEXIST: /* -17 */
|
||||
errstr = "Object already exists";
|
||||
break;
|
||||
case -ENOSPC: /* -28 */
|
||||
errstr = "No space left";
|
||||
break;
|
||||
case -EROFS: /* -30 */
|
||||
errstr = "Readonly filesystem";
|
||||
break;
|
||||
case -EOPNOTSUPP: /* -95 */
|
||||
errstr = "Operation not supported";
|
||||
break;
|
||||
case -EUCLEAN: /* -117 */
|
||||
errstr = "Filesystem corrupted";
|
||||
break;
|
||||
case -EDQUOT: /* -122 */
|
||||
errstr = "Quota exceeded";
|
||||
break;
|
||||
}
|
||||
|
||||
return errstr;
|
||||
}
|
||||
|
||||
/*
|
||||
* __btrfs_handle_fs_error decodes expected errors from the caller and
|
||||
* invokes the appropriate error response.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
#ifdef CONFIG_PRINTK
|
||||
char statestr[STATE_STRING_BUF_LEN];
|
||||
const char *errstr;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Special case: if the error is EROFS, and we're already
|
||||
* under SB_RDONLY, then it is safe here.
|
||||
*/
|
||||
if (errno == -EROFS && sb_rdonly(sb))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
errstr = btrfs_decode_error(errno);
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
if (fmt) {
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
|
||||
sb->s_id, statestr, function, line, errno, errstr, &vaf);
|
||||
va_end(args);
|
||||
} else {
|
||||
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
|
||||
sb->s_id, statestr, function, line, errno, errstr);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Today we only save the error info to memory. Long term we'll
|
||||
* also send it down to the disk
|
||||
*/
|
||||
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
|
||||
|
||||
/* Don't go through full error handling during mount */
|
||||
if (!(sb->s_flags & SB_BORN))
|
||||
return;
|
||||
|
||||
if (sb_rdonly(sb))
|
||||
return;
|
||||
|
||||
btrfs_discard_stop(fs_info);
|
||||
|
||||
/* btrfs handle error by forcing the filesystem readonly */
|
||||
btrfs_set_sb_rdonly(sb);
|
||||
btrfs_info(fs_info, "forced readonly");
|
||||
/*
|
||||
* Note that a running device replace operation is not canceled here
|
||||
* although there is no way to update the progress. It would add the
|
||||
* risk of a deadlock, therefore the canceling is omitted. The only
|
||||
* penalty is that some I/O remains active until the procedure
|
||||
* completes. The next time when the filesystem is mounted writable
|
||||
* again, the device replace operation continues.
|
||||
*/
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
static const char * const logtypes[] = {
|
||||
"emergency",
|
||||
"alert",
|
||||
"critical",
|
||||
"error",
|
||||
"warning",
|
||||
"notice",
|
||||
"info",
|
||||
"debug",
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Use one ratelimit state per log level so that a flood of less important
|
||||
* messages doesn't cause more important ones to be dropped.
|
||||
*/
|
||||
static struct ratelimit_state printk_limits[] = {
|
||||
RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
};
|
||||
|
||||
void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
{
|
||||
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
int kern_level;
|
||||
const char *type = logtypes[4];
|
||||
struct ratelimit_state *ratelimit = &printk_limits[4];
|
||||
|
||||
va_start(args, fmt);
|
||||
|
||||
while ((kern_level = printk_get_level(fmt)) != 0) {
|
||||
size_t size = printk_skip_level(fmt) - fmt;
|
||||
|
||||
if (kern_level >= '0' && kern_level <= '7') {
|
||||
memcpy(lvl, fmt, size);
|
||||
lvl[size] = '\0';
|
||||
type = logtypes[kern_level - '0'];
|
||||
ratelimit = &printk_limits[kern_level - '0'];
|
||||
}
|
||||
fmt += size;
|
||||
}
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
if (__ratelimit(ratelimit)) {
|
||||
if (fs_info) {
|
||||
char statestr[STATE_STRING_BUF_LEN];
|
||||
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
_printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
|
||||
fs_info->sb->s_id, statestr, &vaf);
|
||||
} else {
|
||||
_printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
|
||||
}
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!test_and_set_bit(BTRFS_FS_32BIT_WARN, &fs_info->flags)) {
|
||||
btrfs_warn(fs_info, "reaching 32bit limit for logical addresses");
|
||||
btrfs_warn(fs_info,
|
||||
"due to page cache limit on 32bit systems, btrfs can't access metadata at or beyond %lluT",
|
||||
BTRFS_32BIT_MAX_FILE_SIZE >> 40);
|
||||
btrfs_warn(fs_info,
|
||||
"please consider upgrading to 64bit kernel/hardware");
|
||||
}
|
||||
}
|
||||
|
||||
void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!test_and_set_bit(BTRFS_FS_32BIT_ERROR, &fs_info->flags)) {
|
||||
btrfs_err(fs_info, "reached 32bit limit for logical addresses");
|
||||
btrfs_err(fs_info,
|
||||
"due to page cache limit on 32bit systems, metadata beyond %lluT can't be accessed",
|
||||
BTRFS_32BIT_MAX_FILE_SIZE >> 40);
|
||||
btrfs_err(fs_info,
|
||||
"please consider upgrading to 64bit kernel/hardware");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We only mark the transaction aborted and then set the file system read-only.
|
||||
* This will prevent new transactions from starting or trying to join this
|
||||
* one.
|
||||
*
|
||||
* This means that error recovery at the call site is limited to freeing
|
||||
* any local memory allocations and passing the error code up without
|
||||
* further cleanup. The transaction should complete as it normally would
|
||||
* in the call path but will return -EIO.
|
||||
*
|
||||
* We'll complete the cleanup in btrfs_end_transaction and
|
||||
* btrfs_commit_transaction.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
const char *function,
|
||||
unsigned int line, int errno, bool first_hit)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
WRITE_ONCE(trans->aborted, errno);
|
||||
WRITE_ONCE(trans->transaction->aborted, errno);
|
||||
if (first_hit && errno == -ENOSPC)
|
||||
btrfs_dump_space_info_for_trans_abort(fs_info);
|
||||
/* Wake up anybody who may be waiting on this transaction */
|
||||
wake_up(&fs_info->transaction_wait);
|
||||
wake_up(&fs_info->transaction_blocked_wait);
|
||||
__btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
|
||||
}
|
||||
/*
|
||||
* __btrfs_panic decodes unexpected, fatal errors from the caller,
|
||||
* issues an alert, and either panics or BUGs, depending on mount options.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
char *s_id = "<unknown>";
|
||||
const char *errstr;
|
||||
struct va_format vaf = { .fmt = fmt };
|
||||
va_list args;
|
||||
|
||||
if (fs_info)
|
||||
s_id = fs_info->sb->s_id;
|
||||
|
||||
va_start(args, fmt);
|
||||
vaf.va = &args;
|
||||
|
||||
errstr = btrfs_decode_error(errno);
|
||||
if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
|
||||
panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
|
||||
s_id, function, line, &vaf, errno, errstr);
|
||||
|
||||
btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
|
||||
function, line, &vaf, errno, errstr);
|
||||
va_end(args);
|
||||
/* Caller calls BUG() */
|
||||
}
|
||||
|
||||
static void btrfs_put_super(struct super_block *sb)
|
||||
{
|
||||
close_ctree(btrfs_sb(sb));
|
||||
|
@ -918,12 +605,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
|||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
btrfs_clear_opt(info->mount_opt, NODISCARD);
|
||||
break;
|
||||
case Opt_nodiscard:
|
||||
btrfs_clear_and_info(info, DISCARD_SYNC,
|
||||
"turning off discard");
|
||||
btrfs_clear_and_info(info, DISCARD_ASYNC,
|
||||
"turning off async discard");
|
||||
btrfs_set_opt(info->mount_opt, NODISCARD);
|
||||
break;
|
||||
case Opt_space_cache:
|
||||
case Opt_space_cache_version:
|
||||
|
@ -1394,6 +1083,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
|
|||
struct btrfs_dir_item *di;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key location;
|
||||
struct fscrypt_str name = FSTR_INIT("default", 7);
|
||||
u64 dir_id;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
@ -1406,7 +1096,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
|
|||
* to mount.
|
||||
*/
|
||||
dir_id = btrfs_super_root_dir(fs_info->super_copy);
|
||||
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
|
||||
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
|
||||
if (IS_ERR(di)) {
|
||||
btrfs_free_path(path);
|
||||
return PTR_ERR(di);
|
||||
|
@ -1507,7 +1197,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
|||
* Exit unless we have some pending changes
|
||||
* that need to go through commit
|
||||
*/
|
||||
if (fs_info->pending_changes == 0)
|
||||
if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT,
|
||||
&fs_info->flags))
|
||||
return 0;
|
||||
/*
|
||||
* A non-blocking test if the fs is frozen. We must not
|
||||
|
@ -2645,7 +2336,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
|
|||
* the end of RCU grace period.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\");
|
||||
seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\");
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
|
@ -2694,7 +2385,7 @@ static __cold void btrfs_interface_exit(void)
|
|||
misc_deregister(&btrfs_misc);
|
||||
}
|
||||
|
||||
static void __init btrfs_print_mod_info(void)
|
||||
static int __init btrfs_print_mod_info(void)
|
||||
{
|
||||
static const char options[] = ""
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
|
@ -2721,122 +2412,125 @@ static void __init btrfs_print_mod_info(void)
|
|||
#endif
|
||||
;
|
||||
pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init init_btrfs_fs(void)
|
||||
static int register_btrfs(void)
|
||||
{
|
||||
int err;
|
||||
return register_filesystem(&btrfs_fs_type);
|
||||
}
|
||||
|
||||
btrfs_props_init();
|
||||
static void unregister_btrfs(void)
|
||||
{
|
||||
unregister_filesystem(&btrfs_fs_type);
|
||||
}
|
||||
|
||||
err = btrfs_init_sysfs();
|
||||
if (err)
|
||||
return err;
|
||||
/* Helper structure for long init/exit functions. */
|
||||
struct init_sequence {
|
||||
int (*init_func)(void);
|
||||
/* Can be NULL if the init_func doesn't need cleanup. */
|
||||
void (*exit_func)(void);
|
||||
};
|
||||
|
||||
btrfs_init_compress();
|
||||
static const struct init_sequence mod_init_seq[] = {
|
||||
{
|
||||
.init_func = btrfs_props_init,
|
||||
.exit_func = NULL,
|
||||
}, {
|
||||
.init_func = btrfs_init_sysfs,
|
||||
.exit_func = btrfs_exit_sysfs,
|
||||
}, {
|
||||
.init_func = btrfs_init_compress,
|
||||
.exit_func = btrfs_exit_compress,
|
||||
}, {
|
||||
.init_func = btrfs_init_cachep,
|
||||
.exit_func = btrfs_destroy_cachep,
|
||||
}, {
|
||||
.init_func = btrfs_transaction_init,
|
||||
.exit_func = btrfs_transaction_exit,
|
||||
}, {
|
||||
.init_func = btrfs_ctree_init,
|
||||
.exit_func = btrfs_ctree_exit,
|
||||
}, {
|
||||
.init_func = btrfs_free_space_init,
|
||||
.exit_func = btrfs_free_space_exit,
|
||||
}, {
|
||||
.init_func = extent_state_init_cachep,
|
||||
.exit_func = extent_state_free_cachep,
|
||||
}, {
|
||||
.init_func = extent_buffer_init_cachep,
|
||||
.exit_func = extent_buffer_free_cachep,
|
||||
}, {
|
||||
.init_func = btrfs_bioset_init,
|
||||
.exit_func = btrfs_bioset_exit,
|
||||
}, {
|
||||
.init_func = extent_map_init,
|
||||
.exit_func = extent_map_exit,
|
||||
}, {
|
||||
.init_func = ordered_data_init,
|
||||
.exit_func = ordered_data_exit,
|
||||
}, {
|
||||
.init_func = btrfs_delayed_inode_init,
|
||||
.exit_func = btrfs_delayed_inode_exit,
|
||||
}, {
|
||||
.init_func = btrfs_auto_defrag_init,
|
||||
.exit_func = btrfs_auto_defrag_exit,
|
||||
}, {
|
||||
.init_func = btrfs_delayed_ref_init,
|
||||
.exit_func = btrfs_delayed_ref_exit,
|
||||
}, {
|
||||
.init_func = btrfs_prelim_ref_init,
|
||||
.exit_func = btrfs_prelim_ref_exit,
|
||||
}, {
|
||||
.init_func = btrfs_interface_init,
|
||||
.exit_func = btrfs_interface_exit,
|
||||
}, {
|
||||
.init_func = btrfs_print_mod_info,
|
||||
.exit_func = NULL,
|
||||
}, {
|
||||
.init_func = btrfs_run_sanity_tests,
|
||||
.exit_func = NULL,
|
||||
}, {
|
||||
.init_func = register_btrfs,
|
||||
.exit_func = unregister_btrfs,
|
||||
}
|
||||
};
|
||||
|
||||
err = btrfs_init_cachep();
|
||||
if (err)
|
||||
goto free_compress;
|
||||
static bool mod_init_result[ARRAY_SIZE(mod_init_seq)];
|
||||
|
||||
err = extent_state_init_cachep();
|
||||
if (err)
|
||||
goto free_cachep;
|
||||
static __always_inline void btrfs_exit_btrfs_fs(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
err = extent_buffer_init_cachep();
|
||||
if (err)
|
||||
goto free_extent_cachep;
|
||||
|
||||
err = btrfs_bioset_init();
|
||||
if (err)
|
||||
goto free_eb_cachep;
|
||||
|
||||
err = extent_map_init();
|
||||
if (err)
|
||||
goto free_bioset;
|
||||
|
||||
err = ordered_data_init();
|
||||
if (err)
|
||||
goto free_extent_map;
|
||||
|
||||
err = btrfs_delayed_inode_init();
|
||||
if (err)
|
||||
goto free_ordered_data;
|
||||
|
||||
err = btrfs_auto_defrag_init();
|
||||
if (err)
|
||||
goto free_delayed_inode;
|
||||
|
||||
err = btrfs_delayed_ref_init();
|
||||
if (err)
|
||||
goto free_auto_defrag;
|
||||
|
||||
err = btrfs_prelim_ref_init();
|
||||
if (err)
|
||||
goto free_delayed_ref;
|
||||
|
||||
err = btrfs_interface_init();
|
||||
if (err)
|
||||
goto free_prelim_ref;
|
||||
|
||||
btrfs_print_mod_info();
|
||||
|
||||
err = btrfs_run_sanity_tests();
|
||||
if (err)
|
||||
goto unregister_ioctl;
|
||||
|
||||
err = register_filesystem(&btrfs_fs_type);
|
||||
if (err)
|
||||
goto unregister_ioctl;
|
||||
|
||||
return 0;
|
||||
|
||||
unregister_ioctl:
|
||||
btrfs_interface_exit();
|
||||
free_prelim_ref:
|
||||
btrfs_prelim_ref_exit();
|
||||
free_delayed_ref:
|
||||
btrfs_delayed_ref_exit();
|
||||
free_auto_defrag:
|
||||
btrfs_auto_defrag_exit();
|
||||
free_delayed_inode:
|
||||
btrfs_delayed_inode_exit();
|
||||
free_ordered_data:
|
||||
ordered_data_exit();
|
||||
free_extent_map:
|
||||
extent_map_exit();
|
||||
free_bioset:
|
||||
btrfs_bioset_exit();
|
||||
free_eb_cachep:
|
||||
extent_buffer_free_cachep();
|
||||
free_extent_cachep:
|
||||
extent_state_free_cachep();
|
||||
free_cachep:
|
||||
btrfs_destroy_cachep();
|
||||
free_compress:
|
||||
btrfs_exit_compress();
|
||||
btrfs_exit_sysfs();
|
||||
|
||||
return err;
|
||||
for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) {
|
||||
if (!mod_init_result[i])
|
||||
continue;
|
||||
if (mod_init_seq[i].exit_func)
|
||||
mod_init_seq[i].exit_func();
|
||||
mod_init_result[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void __exit exit_btrfs_fs(void)
|
||||
{
|
||||
btrfs_destroy_cachep();
|
||||
btrfs_delayed_ref_exit();
|
||||
btrfs_auto_defrag_exit();
|
||||
btrfs_delayed_inode_exit();
|
||||
btrfs_prelim_ref_exit();
|
||||
ordered_data_exit();
|
||||
extent_map_exit();
|
||||
btrfs_bioset_exit();
|
||||
extent_state_free_cachep();
|
||||
extent_buffer_free_cachep();
|
||||
btrfs_interface_exit();
|
||||
unregister_filesystem(&btrfs_fs_type);
|
||||
btrfs_exit_sysfs();
|
||||
btrfs_cleanup_fs_uuids();
|
||||
btrfs_exit_compress();
|
||||
btrfs_exit_btrfs_fs();
|
||||
}
|
||||
|
||||
static int __init init_btrfs_fs(void)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) {
|
||||
ASSERT(!mod_init_result[i]);
|
||||
ret = mod_init_seq[i].init_func();
|
||||
if (ret < 0) {
|
||||
btrfs_exit_btrfs_fs();
|
||||
return ret;
|
||||
}
|
||||
mod_init_result[i] = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(init_btrfs_fs);
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_SUPER_H
|
||||
#define BTRFS_SUPER_H
|
||||
|
||||
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
unsigned long new_flags);
|
||||
int btrfs_sync_fs(struct super_block *sb, int wait);
|
||||
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
u64 subvol_objectid);
|
||||
|
||||
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
||||
{
|
||||
return sb->s_fs_info;
|
||||
}
|
||||
|
||||
static inline void btrfs_set_sb_rdonly(struct super_block *sb)
|
||||
{
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
|
||||
}
|
||||
|
||||
static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
|
||||
{
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -10,7 +10,7 @@
|
|||
#include <linux/completion.h>
|
||||
#include <linux/bug.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "discard.h"
|
||||
#include "disk-io.h"
|
||||
|
@ -22,6 +22,8 @@
|
|||
#include "block-group.h"
|
||||
#include "qgroup.h"
|
||||
#include "misc.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
|
||||
/*
|
||||
* Structure name Path
|
||||
|
@ -248,7 +250,7 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
|
|||
/*
|
||||
* We don't want to do full transaction commit from inside sysfs
|
||||
*/
|
||||
btrfs_set_pending(fs_info, COMMIT);
|
||||
set_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
|
||||
wake_up_process(fs_info->transaction_kthread);
|
||||
|
||||
return count;
|
||||
|
@ -762,7 +764,7 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
|
|||
val = min(val, BTRFS_MAX_DATA_CHUNK_SIZE);
|
||||
|
||||
/* Limit stripe size to 10% of available space. */
|
||||
val = min(div_factor(fs_info->fs_devices->total_rw_bytes, 1), val);
|
||||
val = min(mult_perc(fs_info->fs_devices->total_rw_bytes, 10), val);
|
||||
|
||||
/* Must be multiple of 256M. */
|
||||
val &= ~((u64)SZ_256M - 1);
|
||||
|
@ -959,7 +961,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
|
|||
/*
|
||||
* We don't want to do full transaction commit from inside sysfs
|
||||
*/
|
||||
btrfs_set_pending(fs_info, COMMIT);
|
||||
set_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
|
||||
wake_up_process(fs_info->transaction_kthread);
|
||||
|
||||
return len;
|
||||
|
@ -1160,16 +1162,16 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
|
|||
|
||||
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
|
||||
if (fs_devices->read_policy == i)
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
|
||||
ret += sysfs_emit_at(buf, ret, "%s[%s]",
|
||||
(ret == 0 ? "" : " "),
|
||||
btrfs_read_policy_name[i]);
|
||||
else
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||
ret += sysfs_emit_at(buf, ret, "%s%s",
|
||||
(ret == 0 ? "" : " "),
|
||||
btrfs_read_policy_name[i]);
|
||||
}
|
||||
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||
ret += sysfs_emit_at(buf, ret, "\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "../disk-io.h"
|
||||
#include "../qgroup.h"
|
||||
#include "../block-group.h"
|
||||
#include "../fs.h"
|
||||
|
||||
static struct vfsmount *test_mnt = NULL;
|
||||
|
||||
|
@ -101,7 +102,7 @@ struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info)
|
|||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
|
||||
extent_io_tree_init(NULL, &dev->alloc_state, 0);
|
||||
INIT_LIST_HEAD(&dev->dev_list);
|
||||
list_add(&dev->dev_list, &fs_info->fs_devices->devices);
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "../ctree.h"
|
||||
#include "../extent_io.h"
|
||||
#include "../disk-io.h"
|
||||
#include "../accessors.h"
|
||||
|
||||
static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
|
|
|
@ -132,7 +132,7 @@ static int test_find_delalloc(u32 sectorsize)
|
|||
* Passing NULL as we don't have fs_info but tracepoints are not used
|
||||
* at this point
|
||||
*/
|
||||
extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST, NULL);
|
||||
extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST);
|
||||
|
||||
/*
|
||||
* First go through and create and mark all of our pages dirty, we pin
|
||||
|
@ -489,7 +489,7 @@ static int test_find_first_clear_extent_bit(void)
|
|||
|
||||
test_msg("running find_first_clear_extent_bit test");
|
||||
|
||||
extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
|
||||
extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST);
|
||||
|
||||
/* Test correct handling of empty tree */
|
||||
find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "../free-space-tree.h"
|
||||
#include "../transaction.h"
|
||||
#include "../block-group.h"
|
||||
#include "../accessors.h"
|
||||
|
||||
struct free_space_extent {
|
||||
u64 start;
|
||||
|
@ -470,7 +471,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
|
|||
}
|
||||
cache->bitmap_low_thresh = 0;
|
||||
cache->bitmap_high_thresh = (u32)-1;
|
||||
cache->needs_free_space = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &cache->runtime_flags);
|
||||
cache->fs_info = root->fs_info;
|
||||
|
||||
btrfs_init_dummy_trans(&trans, root->fs_info);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "../extent_io.h"
|
||||
#include "../volumes.h"
|
||||
#include "../compression.h"
|
||||
#include "../accessors.h"
|
||||
|
||||
static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
|
||||
u64 ram_bytes, u64 offset, u64 disk_bytenr,
|
||||
|
@ -72,8 +73,8 @@ static void insert_inode_item_key(struct btrfs_root *root)
|
|||
* diagram of how the extents will look though this may not be possible we still
|
||||
* want to make sure everything acts normally (the last number is not inclusive)
|
||||
*
|
||||
* [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291]
|
||||
* [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split]
|
||||
* [0 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291]
|
||||
* [inline][hole but no extent][ hole ][ regular ][regular1 split]
|
||||
*
|
||||
* [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ]
|
||||
* [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written]
|
||||
|
@ -90,19 +91,12 @@ static void setup_file_extents(struct btrfs_root *root, u32 sectorsize)
|
|||
u64 disk_bytenr = SZ_1M;
|
||||
u64 offset = 0;
|
||||
|
||||
/* First we want a hole */
|
||||
insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
|
||||
slot);
|
||||
slot++;
|
||||
offset += 5;
|
||||
|
||||
/*
|
||||
* Now we want an inline extent, I don't think this is possible but hey
|
||||
* why not? Also keep in mind if we have an inline extent it counts as
|
||||
* the whole first page. If we were to expand it we would have to cow
|
||||
* and we wouldn't have an inline extent anymore.
|
||||
* Tree-checker has strict limits on inline extents that they can only
|
||||
* exist at file offset 0, thus we can only have one inline file extent
|
||||
* at most.
|
||||
*/
|
||||
insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
|
||||
insert_extent(root, offset, 6, 6, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
|
||||
slot);
|
||||
slot++;
|
||||
offset = sectorsize;
|
||||
|
@ -281,37 +275,25 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->block_start);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != 0 || em->len != 5) {
|
||||
test_err(
|
||||
"unexpected extent wanted start 0 len 5, got start %llu len %llu",
|
||||
em->start, em->len);
|
||||
goto out;
|
||||
}
|
||||
if (em->flags != 0) {
|
||||
test_err("unexpected flags set, want 0 have %lu", em->flags);
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_INLINE) {
|
||||
test_err("expected an inline, got %llu", em->block_start);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (em->start != offset || em->len != (sectorsize - 5)) {
|
||||
/*
|
||||
* For inline extent, we always round up the em to sectorsize, as
|
||||
* they are either:
|
||||
*
|
||||
* a) a hidden hole
|
||||
* The range will be zeroed at inline extent read time.
|
||||
*
|
||||
* b) a file extent with unaligned bytenr
|
||||
* Tree checker will reject it.
|
||||
*/
|
||||
if (em->start != 0 || em->len != sectorsize) {
|
||||
test_err(
|
||||
"unexpected extent wanted start %llu len 1, got start %llu len %llu",
|
||||
offset, em->start, em->len);
|
||||
"unexpected extent wanted start 0 len %u, got start %llu len %llu",
|
||||
sectorsize, em->start, em->len);
|
||||
goto out;
|
||||
}
|
||||
if (em->flags != 0) {
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include "../disk-io.h"
|
||||
#include "../qgroup.h"
|
||||
#include "../backref.h"
|
||||
#include "../fs.h"
|
||||
#include "../accessors.h"
|
||||
|
||||
static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
|
||||
u64 num_bytes, u64 parent, u64 root_objectid)
|
||||
|
@ -203,6 +205,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
|
|||
static int test_no_shared_qgroup(struct btrfs_root *root,
|
||||
u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
struct btrfs_trans_handle trans;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct ulist *old_roots = NULL;
|
||||
|
@ -218,16 +221,22 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ctx.bytenr = nodesize;
|
||||
ctx.trans = &trans;
|
||||
ctx.fs_info = fs_info;
|
||||
|
||||
/*
|
||||
* Since the test trans doesn't have the complicated delayed refs,
|
||||
* we can only call btrfs_qgroup_account_extent() directly to test
|
||||
* quota.
|
||||
*/
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
old_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
|
||||
BTRFS_FS_TREE_OBJECTID);
|
||||
|
@ -236,12 +245,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
new_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
|
||||
new_roots);
|
||||
|
@ -260,11 +271,13 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
old_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = remove_extent_item(root, nodesize, nodesize);
|
||||
if (ret) {
|
||||
|
@ -272,12 +285,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
new_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
|
||||
new_roots);
|
||||
|
@ -302,6 +317,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
|||
static int test_multiple_refs(struct btrfs_root *root,
|
||||
u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
struct btrfs_trans_handle trans;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct ulist *old_roots = NULL;
|
||||
|
@ -322,11 +338,17 @@ static int test_multiple_refs(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
|
||||
ctx.bytenr = nodesize;
|
||||
ctx.trans = &trans;
|
||||
ctx.fs_info = fs_info;
|
||||
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
old_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
|
||||
BTRFS_FS_TREE_OBJECTID);
|
||||
|
@ -335,12 +357,14 @@ static int test_multiple_refs(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
new_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
|
||||
new_roots);
|
||||
|
@ -355,11 +379,13 @@ static int test_multiple_refs(struct btrfs_root *root,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
old_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = add_tree_ref(root, nodesize, nodesize, 0,
|
||||
BTRFS_FIRST_FREE_OBJECTID);
|
||||
|
@ -368,12 +394,14 @@ static int test_multiple_refs(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
new_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
|
||||
new_roots);
|
||||
|
@ -394,11 +422,13 @@ static int test_multiple_refs(struct btrfs_root *root,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
old_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = remove_extent_ref(root, nodesize, nodesize, 0,
|
||||
BTRFS_FIRST_FREE_OBJECTID);
|
||||
|
@ -407,12 +437,14 @@ static int test_multiple_refs(struct btrfs_root *root,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
|
||||
ret = btrfs_find_all_roots(&ctx, false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_err("couldn't find old roots: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
new_roots = ctx.roots;
|
||||
ctx.roots = NULL;
|
||||
|
||||
ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
|
||||
new_roots);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
@ -23,6 +24,18 @@
|
|||
#include "block-group.h"
|
||||
#include "space-info.h"
|
||||
#include "zoned.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "extent-tree.h"
|
||||
#include "root-tree.h"
|
||||
#include "defrag.h"
|
||||
#include "dir-item.h"
|
||||
#include "uuid-tree.h"
|
||||
#include "ioctl.h"
|
||||
#include "relocation.h"
|
||||
#include "scrub.h"
|
||||
|
||||
static struct kmem_cache *btrfs_trans_handle_cachep;
|
||||
|
||||
#define BTRFS_ROOT_TRANS_TAG 0
|
||||
|
||||
|
@ -365,9 +378,9 @@ loop:
|
|||
spin_lock_init(&cur_trans->releasing_ebs_lock);
|
||||
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
||||
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
|
||||
IO_TREE_TRANS_DIRTY_PAGES, NULL);
|
||||
IO_TREE_TRANS_DIRTY_PAGES);
|
||||
extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
|
||||
IO_TREE_FS_PINNED_EXTENTS, NULL);
|
||||
IO_TREE_FS_PINNED_EXTENTS);
|
||||
fs_info->generation++;
|
||||
cur_trans->transid = fs_info->generation;
|
||||
fs_info->running_transaction = cur_trans;
|
||||
|
@ -936,7 +949,7 @@ static bool should_end_transaction(struct btrfs_trans_handle *trans)
|
|||
if (btrfs_check_space_for_delayed_refs(fs_info))
|
||||
return true;
|
||||
|
||||
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
|
||||
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 50);
|
||||
}
|
||||
|
||||
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
|
||||
|
@ -1607,10 +1620,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *root = pending->root;
|
||||
struct btrfs_root *parent_root;
|
||||
struct btrfs_block_rsv *rsv;
|
||||
struct inode *parent_inode;
|
||||
struct inode *parent_inode = pending->dir;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_dir_item *dir_item;
|
||||
struct dentry *dentry;
|
||||
struct extent_buffer *tmp;
|
||||
struct extent_buffer *old;
|
||||
struct timespec64 cur_time;
|
||||
|
@ -1619,6 +1631,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
u64 index = 0;
|
||||
u64 objectid;
|
||||
u64 root_flags;
|
||||
unsigned int nofs_flags;
|
||||
struct fscrypt_name fname;
|
||||
|
||||
ASSERT(pending->path);
|
||||
path = pending->path;
|
||||
|
@ -1626,9 +1640,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
ASSERT(pending->root_item);
|
||||
new_root_item = pending->root_item;
|
||||
|
||||
/*
|
||||
* We're inside a transaction and must make sure that any potential
|
||||
* allocations with GFP_KERNEL in fscrypt won't recurse back to
|
||||
* filesystem.
|
||||
*/
|
||||
nofs_flags = memalloc_nofs_save();
|
||||
pending->error = fscrypt_setup_filename(parent_inode,
|
||||
&pending->dentry->d_name, 0,
|
||||
&fname);
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
if (pending->error)
|
||||
goto free_pending;
|
||||
|
||||
pending->error = btrfs_get_free_objectid(tree_root, &objectid);
|
||||
if (pending->error)
|
||||
goto no_free_objectid;
|
||||
goto free_fname;
|
||||
|
||||
/*
|
||||
* Make qgroup to skip current new snapshot's qgroupid, as it is
|
||||
|
@ -1657,8 +1684,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
trace_btrfs_space_reservation(fs_info, "transaction",
|
||||
trans->transid,
|
||||
trans->bytes_reserved, 1);
|
||||
dentry = pending->dentry;
|
||||
parent_inode = pending->dir;
|
||||
parent_root = BTRFS_I(parent_inode)->root;
|
||||
ret = record_root_in_trans(trans, parent_root, 0);
|
||||
if (ret)
|
||||
|
@ -1674,8 +1699,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
/* check if there is a file/dir which has the same name. */
|
||||
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
|
||||
btrfs_ino(BTRFS_I(parent_inode)),
|
||||
dentry->d_name.name,
|
||||
dentry->d_name.len, 0);
|
||||
&fname.disk_name, 0);
|
||||
if (dir_item != NULL && !IS_ERR(dir_item)) {
|
||||
pending->error = -EEXIST;
|
||||
goto dir_item_existed;
|
||||
|
@ -1770,7 +1794,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
ret = btrfs_add_root_ref(trans, objectid,
|
||||
parent_root->root_key.objectid,
|
||||
btrfs_ino(BTRFS_I(parent_inode)), index,
|
||||
dentry->d_name.name, dentry->d_name.len);
|
||||
&fname.disk_name);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
|
@ -1802,9 +1826,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
ret = btrfs_insert_dir_item(trans, dentry->d_name.name,
|
||||
dentry->d_name.len, BTRFS_I(parent_inode),
|
||||
&key, BTRFS_FT_DIR, index);
|
||||
ret = btrfs_insert_dir_item(trans, &fname.disk_name,
|
||||
BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
|
||||
index);
|
||||
/* We have check then name at the beginning, so it is impossible. */
|
||||
BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
|
||||
if (ret) {
|
||||
|
@ -1813,7 +1837,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
|
||||
dentry->d_name.len * 2);
|
||||
fname.disk_name.len * 2);
|
||||
parent_inode->i_mtime = current_time(parent_inode);
|
||||
parent_inode->i_ctime = parent_inode->i_mtime;
|
||||
ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
|
||||
|
@ -1845,7 +1869,9 @@ dir_item_existed:
|
|||
trans->bytes_reserved = 0;
|
||||
clear_skip_qgroup:
|
||||
btrfs_clear_skip_qgroup(trans);
|
||||
no_free_objectid:
|
||||
free_fname:
|
||||
fscrypt_free_filename(&fname);
|
||||
free_pending:
|
||||
kfree(new_root_item);
|
||||
pending->root_item = NULL;
|
||||
btrfs_free_path(path);
|
||||
|
@ -2101,6 +2127,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|||
ASSERT(refcount_read(&trans->use_count) == 1);
|
||||
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
|
||||
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
|
||||
|
||||
/* Stop the commit early if ->aborted is set */
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
ret = cur_trans->aborted;
|
||||
|
@ -2354,12 +2382,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|||
if (ret)
|
||||
goto unlock_reloc;
|
||||
|
||||
/*
|
||||
* Since the transaction is done, we can apply the pending changes
|
||||
* before the next transaction.
|
||||
*/
|
||||
btrfs_apply_pending_changes(fs_info);
|
||||
|
||||
/* commit_fs_roots gets rid of all the tree log roots, it is now
|
||||
* safe to free the root of tree log roots
|
||||
*/
|
||||
|
@ -2582,21 +2604,17 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
|
|||
return (ret < 0) ? 0 : 1;
|
||||
}
|
||||
|
||||
void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
|
||||
int __init btrfs_transaction_init(void)
|
||||
{
|
||||
unsigned long prev;
|
||||
unsigned long bit;
|
||||
|
||||
prev = xchg(&fs_info->pending_changes, 0);
|
||||
if (!prev)
|
||||
return;
|
||||
|
||||
bit = 1 << BTRFS_PENDING_COMMIT;
|
||||
if (prev & bit)
|
||||
btrfs_debug(fs_info, "pending commit done");
|
||||
prev &= ~bit;
|
||||
|
||||
if (prev)
|
||||
btrfs_warn(fs_info,
|
||||
"unknown pending changes left 0x%lx, ignoring", prev);
|
||||
btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
|
||||
sizeof(struct btrfs_trans_handle), 0,
|
||||
SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
|
||||
if (!btrfs_trans_handle_cachep)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_transaction_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_trans_handle_cachep);
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "btrfs_inode.h"
|
||||
#include "delayed-ref.h"
|
||||
#include "ctree.h"
|
||||
#include "misc.h"
|
||||
|
||||
enum btrfs_trans_state {
|
||||
TRANS_STATE_RUNNING,
|
||||
|
@ -98,14 +99,15 @@ struct btrfs_transaction {
|
|||
struct list_head releasing_ebs;
|
||||
};
|
||||
|
||||
#define __TRANS_FREEZABLE (1U << 0)
|
||||
|
||||
#define __TRANS_START (1U << 9)
|
||||
#define __TRANS_ATTACH (1U << 10)
|
||||
#define __TRANS_JOIN (1U << 11)
|
||||
#define __TRANS_JOIN_NOLOCK (1U << 12)
|
||||
#define __TRANS_DUMMY (1U << 13)
|
||||
#define __TRANS_JOIN_NOSTART (1U << 14)
|
||||
enum {
|
||||
ENUM_BIT(__TRANS_FREEZABLE),
|
||||
ENUM_BIT(__TRANS_START),
|
||||
ENUM_BIT(__TRANS_ATTACH),
|
||||
ENUM_BIT(__TRANS_JOIN),
|
||||
ENUM_BIT(__TRANS_JOIN_NOLOCK),
|
||||
ENUM_BIT(__TRANS_DUMMY),
|
||||
ENUM_BIT(__TRANS_JOIN_NOSTART),
|
||||
};
|
||||
|
||||
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
|
||||
#define TRANS_ATTACH (__TRANS_ATTACH)
|
||||
|
@ -231,9 +233,11 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark);
|
|||
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
|
||||
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
|
||||
void btrfs_put_transaction(struct btrfs_transaction *transaction);
|
||||
void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
|
||||
|
||||
int __init btrfs_transaction_init(void);
|
||||
void __cold btrfs_transaction_exit(void);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "tree-checker.h"
|
||||
#include "disk-io.h"
|
||||
|
@ -25,6 +26,9 @@
|
|||
#include "volumes.h"
|
||||
#include "misc.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "fs.h"
|
||||
#include "accessors.h"
|
||||
#include "file-item.h"
|
||||
|
||||
/*
|
||||
* Error message should follow the following format:
|
||||
|
@ -528,7 +532,7 @@ static int check_dir_item(struct extent_buffer *leaf,
|
|||
}
|
||||
|
||||
/* dir type check */
|
||||
dir_type = btrfs_dir_type(leaf, di);
|
||||
dir_type = btrfs_dir_ftype(leaf, di);
|
||||
if (unlikely(dir_type >= BTRFS_FT_MAX)) {
|
||||
dir_item_err(leaf, slot,
|
||||
"invalid dir item type, have %u expect [0, %u)",
|
||||
|
@ -1780,10 +1784,10 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
|
|||
|
||||
/* Also check if the item pointer overlaps with btrfs item. */
|
||||
if (unlikely(btrfs_item_ptr_offset(leaf, slot) <
|
||||
btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item))) {
|
||||
btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) {
|
||||
generic_err(leaf, slot,
|
||||
"slot overlaps with its data, item end %lu data start %lu",
|
||||
btrfs_item_nr_offset(slot) +
|
||||
btrfs_item_nr_offset(leaf, slot) +
|
||||
sizeof(struct btrfs_item),
|
||||
btrfs_item_ptr_offset(leaf, slot));
|
||||
return -EUCLEAN;
|
||||
|
|
|
@ -6,8 +6,39 @@
|
|||
#ifndef BTRFS_TREE_CHECKER_H
|
||||
#define BTRFS_TREE_CHECKER_H
|
||||
|
||||
#include "ctree.h"
|
||||
#include "extent_io.h"
|
||||
#include <uapi/linux/btrfs_tree.h>
|
||||
|
||||
struct extent_buffer;
|
||||
struct btrfs_chunk;
|
||||
|
||||
/* All the extra info needed to verify the parentness of a tree block. */
|
||||
struct btrfs_tree_parent_check {
|
||||
/*
|
||||
* The owner check against the tree block.
|
||||
*
|
||||
* Can be 0 to skip the owner check.
|
||||
*/
|
||||
u64 owner_root;
|
||||
|
||||
/*
|
||||
* Expected transid, can be 0 to skip the check, but such skip
|
||||
* should only be utlized for backref walk related code.
|
||||
*/
|
||||
u64 transid;
|
||||
|
||||
/*
|
||||
* The expected first key.
|
||||
*
|
||||
* This check can be skipped if @has_first_key is false, such skip
|
||||
* can happen for case where we don't have the parent node key,
|
||||
* e.g. reading the tree root, doing backref walk.
|
||||
*/
|
||||
struct btrfs_key first_key;
|
||||
bool has_first_key;
|
||||
|
||||
/* The expected level. Should always be set. */
|
||||
u8 level;
|
||||
};
|
||||
|
||||
/*
|
||||
* Comprehensive leaf checker.
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue