for-5.19-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmKLxJAACgkQxWXV+ddt WDvC4BAAnSNwZ15FJKe5Y423f6PS6EXjyMuc5t/fW6UumTTbI+tsS+Glkis+JNBf BiDZSlVQmiK9WoQSJe04epZgHaK8MaCARyZaRaxjDC4Nvfq4DlD9mbAU9D6e7tZY Mo8M99D8wDW+SB+P8RBpNjwB/oGCMmE3nKC83g+1ObmA0FVRCyQ1Kazf8RzNT1rZ DiaJoKTvU1/wDN3/1rw5yG+EfW2m9A14gRCihslhFYaDV7jhpuabl8wLT7MftZtE MtJ6EOOQbgIDjnp5BEIrPmowW/N0tKDT/gorF7cWgLG2R1cbSlKgqSH1Sq7CjFUE AKj/DwfqZArPLpqMThWklCwy2B9qDEezrQSy7renP/vkeFLbOp8hQuIY5KRzohdG oDI8ThlQGtCVjbny6NX/BbCnWRAfTz0TquCgag3Xl8NbkRFgFJtkf/cSxzb+3LW1 tFeiUyTVLXVDS1cZLwgcb29Rrtp4bjd5/v3uECQlVD+or5pcAqSMkQgOBlyQJGbE Xb0nmPRihzQ8D4vINa63WwRyq0+QczVjvBxKj1daas0VEKGd32PIBS/0Qha+EpGl uFMiHBMSfqyl8QcShFk0cCbcgPMcNc7I6IAbXCE/WhhFG0ytqm9vpmlLqsTrXmHH z7/Eye/waqgACNEXoA8C4pyYzduQ4i1CeLDOdcsvBU6XQSuicSM= =lv6P -----END PGP SIGNATURE----- Merge tag 'for-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "Features: - subpage: - support for PAGE_SIZE > 4K (previously only 64K) - make it work with raid56 - repair super block num_devices automatically if it does not match the number of device items - defrag can convert inline extents to regular extents, up to now inline files were skipped but the setting of mount option max_inline could affect the decision logic - zoned: - minimal accepted zone size is explicitly set to 4MiB - make zone reclaim less aggressive and don't reclaim if there are enough free zones - add per-profile sysfs tunable of the reclaim threshold - allow automatic block group reclaim for non-zoned filesystems, with sysfs tunables - tree-checker: new check, compare extent buffer owner against owner rootid Performance: - avoid blocking on space reservation when doing nowait direct io writes (+7% throughput for reads and writes) - NOCOW write throughput improvement due to refined locking (+3%) - send: reduce pressure to page cache by dropping extent pages right after they're processed Core: - convert all radix trees to xarray - add iterators for b-tree node items - support printk message index - user bulk page allocation for extent buffers - switch to bio_alloc API, use on-stack bios where convenient, other bio cleanups - use rw lock for block groups to favor concurrent reads - simplify workques, don't allocate high priority threads for all normal queues as we need only one - refactor scrub, process chunks based on their constraints and similarity - allocate direct io structures on stack and pass around only pointers, avoids allocation and reduces potential error handling Fixes: - fix count of reserved transaction items for various inode operations - fix deadlock between concurrent dio writes when low on free data space - fix a few cases when zones need to be finished VFS, iomap: - add helper to check if sb write has started (usable for assertions) - new helper iomap_dio_alloc_bio, export iomap_dio_bio_end_io" * tag 'for-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (173 commits) btrfs: zoned: introduce a minimal zone size 4M and reject mount btrfs: allow defrag to convert inline extents to regular extents btrfs: add "0x" prefix for unsupported optional features btrfs: do not account twice for inode ref when reserving metadata units btrfs: zoned: fix comparison of alloc_offset vs meta_write_pointer btrfs: send: avoid trashing the page cache btrfs: send: keep the current inode open while processing it btrfs: allocate the btrfs_dio_private as part of the iomap dio bio btrfs: move struct btrfs_dio_private to inode.c btrfs: remove the disk_bytenr in struct btrfs_dio_private btrfs: allocate dio_data on stack iomap: add per-iomap_iter private data iomap: allow the file system to provide a bio_set for direct I/O btrfs: add a btrfs_dio_rw wrapper btrfs: zoned: zone finish unused block group btrfs: zoned: properly finish block group on metadata write btrfs: zoned: finish block group when there are no more allocatable bytes left btrfs: zoned: consolidate zone finish functions btrfs: zoned: introduce btrfs_zoned_bg_is_full btrfs: improve error reporting in lookup_inline_extent_backref ...
This commit is contained in:
commit
bd1b7c1384
|
@ -55,9 +55,8 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu)
|
|||
return acl;
|
||||
}
|
||||
|
||||
static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
|
||||
struct user_namespace *mnt_userns,
|
||||
struct inode *inode, struct posix_acl *acl, int type)
|
||||
int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
struct posix_acl *acl, int type)
|
||||
{
|
||||
int ret, size = 0;
|
||||
const char *name;
|
||||
|
@ -123,40 +122,8 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
|
|||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
ret = __btrfs_set_acl(NULL, mnt_userns, inode, acl, type);
|
||||
ret = __btrfs_set_acl(NULL, inode, acl, type);
|
||||
if (ret)
|
||||
inode->i_mode = old_mode;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_init_acl(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct inode *dir)
|
||||
{
|
||||
struct posix_acl *default_acl, *acl;
|
||||
int ret = 0;
|
||||
|
||||
/* this happens with subvols */
|
||||
if (!dir)
|
||||
return 0;
|
||||
|
||||
ret = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (default_acl) {
|
||||
ret = __btrfs_set_acl(trans, &init_user_ns, inode, default_acl,
|
||||
ACL_TYPE_DEFAULT);
|
||||
posix_acl_release(default_acl);
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
if (!ret)
|
||||
ret = __btrfs_set_acl(trans, &init_user_ns, inode, acl,
|
||||
ACL_TYPE_ACCESS);
|
||||
posix_acl_release(acl);
|
||||
}
|
||||
|
||||
if (!default_acl && !acl)
|
||||
cache_no_acl(inode);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -15,13 +15,12 @@
|
|||
enum {
|
||||
WORK_DONE_BIT,
|
||||
WORK_ORDER_DONE_BIT,
|
||||
WORK_HIGH_PRIO_BIT,
|
||||
};
|
||||
|
||||
#define NO_THRESHOLD (-1)
|
||||
#define DFT_THRESHOLD (32)
|
||||
|
||||
struct __btrfs_workqueue {
|
||||
struct btrfs_workqueue {
|
||||
struct workqueue_struct *normal_wq;
|
||||
|
||||
/* File system this workqueue services */
|
||||
|
@ -48,12 +47,7 @@ struct __btrfs_workqueue {
|
|||
spinlock_t thres_lock;
|
||||
};
|
||||
|
||||
struct btrfs_workqueue {
|
||||
struct __btrfs_workqueue *normal;
|
||||
struct __btrfs_workqueue *high;
|
||||
};
|
||||
|
||||
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
|
||||
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct btrfs_workqueue *wq)
|
||||
{
|
||||
return wq->fs_info;
|
||||
}
|
||||
|
@ -66,22 +60,22 @@ struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work)
|
|||
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
|
||||
{
|
||||
/*
|
||||
* We could compare wq->normal->pending with num_online_cpus()
|
||||
* We could compare wq->pending with num_online_cpus()
|
||||
* to support "thresh == NO_THRESHOLD" case, but it requires
|
||||
* moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
|
||||
* postpone it until someone needs the support of that case.
|
||||
*/
|
||||
if (wq->normal->thresh == NO_THRESHOLD)
|
||||
if (wq->thresh == NO_THRESHOLD)
|
||||
return false;
|
||||
|
||||
return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
|
||||
return atomic_read(&wq->pending) > wq->thresh * 2;
|
||||
}
|
||||
|
||||
static struct __btrfs_workqueue *
|
||||
__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
|
||||
unsigned int flags, int limit_active, int thresh)
|
||||
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
|
||||
const char *name, unsigned int flags,
|
||||
int limit_active, int thresh)
|
||||
{
|
||||
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
|
||||
struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
|
||||
|
||||
if (!ret)
|
||||
return NULL;
|
||||
|
@ -105,12 +99,8 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
|
|||
ret->thresh = thresh;
|
||||
}
|
||||
|
||||
if (flags & WQ_HIGHPRI)
|
||||
ret->normal_wq = alloc_workqueue("btrfs-%s-high", flags,
|
||||
ret->current_active, name);
|
||||
else
|
||||
ret->normal_wq = alloc_workqueue("btrfs-%s", flags,
|
||||
ret->current_active, name);
|
||||
ret->normal_wq = alloc_workqueue("btrfs-%s", flags, ret->current_active,
|
||||
name);
|
||||
if (!ret->normal_wq) {
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
|
@ -119,41 +109,7 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
|
|||
INIT_LIST_HEAD(&ret->ordered_list);
|
||||
spin_lock_init(&ret->list_lock);
|
||||
spin_lock_init(&ret->thres_lock);
|
||||
trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void
|
||||
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
|
||||
|
||||
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
|
||||
const char *name,
|
||||
unsigned int flags,
|
||||
int limit_active,
|
||||
int thresh)
|
||||
{
|
||||
struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
|
||||
|
||||
if (!ret)
|
||||
return NULL;
|
||||
|
||||
ret->normal = __btrfs_alloc_workqueue(fs_info, name,
|
||||
flags & ~WQ_HIGHPRI,
|
||||
limit_active, thresh);
|
||||
if (!ret->normal) {
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (flags & WQ_HIGHPRI) {
|
||||
ret->high = __btrfs_alloc_workqueue(fs_info, name, flags,
|
||||
limit_active, thresh);
|
||||
if (!ret->high) {
|
||||
__btrfs_destroy_workqueue(ret->normal);
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
trace_btrfs_workqueue_alloc(ret, name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -162,7 +118,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
|
|||
* This hook WILL be called in IRQ handler context,
|
||||
* so workqueue_set_max_active MUST NOT be called in this hook
|
||||
*/
|
||||
static inline void thresh_queue_hook(struct __btrfs_workqueue *wq)
|
||||
static inline void thresh_queue_hook(struct btrfs_workqueue *wq)
|
||||
{
|
||||
if (wq->thresh == NO_THRESHOLD)
|
||||
return;
|
||||
|
@ -174,7 +130,7 @@ static inline void thresh_queue_hook(struct __btrfs_workqueue *wq)
|
|||
* This hook is called in kthread content.
|
||||
* So workqueue_set_max_active is called here.
|
||||
*/
|
||||
static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
|
||||
static inline void thresh_exec_hook(struct btrfs_workqueue *wq)
|
||||
{
|
||||
int new_current_active;
|
||||
long pending;
|
||||
|
@ -217,7 +173,7 @@ out:
|
|||
}
|
||||
}
|
||||
|
||||
static void run_ordered_work(struct __btrfs_workqueue *wq,
|
||||
static void run_ordered_work(struct btrfs_workqueue *wq,
|
||||
struct btrfs_work *self)
|
||||
{
|
||||
struct list_head *list = &wq->ordered_list;
|
||||
|
@ -305,7 +261,7 @@ static void btrfs_work_helper(struct work_struct *normal_work)
|
|||
{
|
||||
struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
|
||||
normal_work);
|
||||
struct __btrfs_workqueue *wq;
|
||||
struct btrfs_workqueue *wq = work->wq;
|
||||
int need_order = 0;
|
||||
|
||||
/*
|
||||
|
@ -318,7 +274,6 @@ static void btrfs_work_helper(struct work_struct *normal_work)
|
|||
*/
|
||||
if (work->ordered_func)
|
||||
need_order = 1;
|
||||
wq = work->wq;
|
||||
|
||||
trace_btrfs_work_sched(work);
|
||||
thresh_exec_hook(wq);
|
||||
|
@ -350,8 +305,7 @@ void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
|
|||
work->flags = 0;
|
||||
}
|
||||
|
||||
static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
|
||||
struct btrfs_work *work)
|
||||
void btrfs_queue_work(struct btrfs_workqueue *wq, struct btrfs_work *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -366,54 +320,22 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
|
|||
queue_work(wq->normal_wq, &work->normal_work);
|
||||
}
|
||||
|
||||
void btrfs_queue_work(struct btrfs_workqueue *wq,
|
||||
struct btrfs_work *work)
|
||||
{
|
||||
struct __btrfs_workqueue *dest_wq;
|
||||
|
||||
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high)
|
||||
dest_wq = wq->high;
|
||||
else
|
||||
dest_wq = wq->normal;
|
||||
__btrfs_queue_work(dest_wq, work);
|
||||
}
|
||||
|
||||
static inline void
|
||||
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq)
|
||||
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
|
||||
{
|
||||
if (!wq)
|
||||
return;
|
||||
destroy_workqueue(wq->normal_wq);
|
||||
trace_btrfs_workqueue_destroy(wq);
|
||||
kfree(wq);
|
||||
}
|
||||
|
||||
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
|
||||
{
|
||||
if (!wq)
|
||||
return;
|
||||
if (wq->high)
|
||||
__btrfs_destroy_workqueue(wq->high);
|
||||
__btrfs_destroy_workqueue(wq->normal);
|
||||
kfree(wq);
|
||||
}
|
||||
|
||||
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active)
|
||||
{
|
||||
if (!wq)
|
||||
return;
|
||||
wq->normal->limit_active = limit_active;
|
||||
if (wq->high)
|
||||
wq->high->limit_active = limit_active;
|
||||
}
|
||||
|
||||
void btrfs_set_work_high_priority(struct btrfs_work *work)
|
||||
{
|
||||
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
|
||||
if (wq)
|
||||
wq->limit_active = limit_active;
|
||||
}
|
||||
|
||||
void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
|
||||
{
|
||||
if (wq->high)
|
||||
flush_workqueue(wq->high->normal_wq);
|
||||
|
||||
flush_workqueue(wq->normal->normal_wq);
|
||||
flush_workqueue(wq->normal_wq);
|
||||
}
|
||||
|
|
|
@ -11,8 +11,6 @@
|
|||
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_workqueue;
|
||||
/* Internal use only */
|
||||
struct __btrfs_workqueue;
|
||||
struct btrfs_work;
|
||||
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
|
||||
typedef void (*btrfs_work_func_t)(struct work_struct *arg);
|
||||
|
@ -25,7 +23,7 @@ struct btrfs_work {
|
|||
/* Don't touch things below */
|
||||
struct work_struct normal_work;
|
||||
struct list_head ordered_list;
|
||||
struct __btrfs_workqueue *wq;
|
||||
struct btrfs_workqueue *wq;
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
|
@ -40,9 +38,8 @@ void btrfs_queue_work(struct btrfs_workqueue *wq,
|
|||
struct btrfs_work *work);
|
||||
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
|
||||
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
|
||||
void btrfs_set_work_high_priority(struct btrfs_work *work);
|
||||
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
|
||||
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
|
||||
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct btrfs_workqueue *wq);
|
||||
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
|
||||
void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
|
||||
|
||||
|
|
|
@ -168,11 +168,12 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
|
|||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_block_group *cache;
|
||||
bool leftmost = true;
|
||||
|
||||
ASSERT(block_group->length != 0);
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
p = &info->block_group_cache_tree.rb_node;
|
||||
write_lock(&info->block_group_cache_lock);
|
||||
p = &info->block_group_cache_tree.rb_root.rb_node;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
|
@ -181,20 +182,18 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
|
|||
p = &(*p)->rb_left;
|
||||
} else if (block_group->start > cache->start) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
spin_unlock(&info->block_group_cache_lock);
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&block_group->cache_node, parent, p);
|
||||
rb_insert_color(&block_group->cache_node,
|
||||
&info->block_group_cache_tree);
|
||||
rb_insert_color_cached(&block_group->cache_node,
|
||||
&info->block_group_cache_tree, leftmost);
|
||||
|
||||
if (info->first_logical_byte > block_group->start)
|
||||
info->first_logical_byte = block_group->start;
|
||||
|
||||
spin_unlock(&info->block_group_cache_lock);
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -210,8 +209,8 @@ static struct btrfs_block_group *block_group_cache_tree_search(
|
|||
struct rb_node *n;
|
||||
u64 end, start;
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
n = info->block_group_cache_tree.rb_node;
|
||||
read_lock(&info->block_group_cache_lock);
|
||||
n = info->block_group_cache_tree.rb_root.rb_node;
|
||||
|
||||
while (n) {
|
||||
cache = rb_entry(n, struct btrfs_block_group, cache_node);
|
||||
|
@ -233,12 +232,9 @@ static struct btrfs_block_group *block_group_cache_tree_search(
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (ret) {
|
||||
if (ret)
|
||||
btrfs_get_block_group(ret);
|
||||
if (bytenr == 0 && info->first_logical_byte > ret->start)
|
||||
info->first_logical_byte = ret->start;
|
||||
}
|
||||
spin_unlock(&info->block_group_cache_lock);
|
||||
read_unlock(&info->block_group_cache_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -267,15 +263,15 @@ struct btrfs_block_group *btrfs_next_block_group(
|
|||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
struct rb_node *node;
|
||||
|
||||
spin_lock(&fs_info->block_group_cache_lock);
|
||||
read_lock(&fs_info->block_group_cache_lock);
|
||||
|
||||
/* If our block group was removed, we need a full search. */
|
||||
if (RB_EMPTY_NODE(&cache->cache_node)) {
|
||||
const u64 next_bytenr = cache->start + cache->length;
|
||||
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
read_unlock(&fs_info->block_group_cache_lock);
|
||||
btrfs_put_block_group(cache);
|
||||
cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
|
||||
return btrfs_lookup_first_block_group(fs_info, next_bytenr);
|
||||
}
|
||||
node = rb_next(&cache->cache_node);
|
||||
btrfs_put_block_group(cache);
|
||||
|
@ -284,46 +280,70 @@ struct btrfs_block_group *btrfs_next_block_group(
|
|||
btrfs_get_block_group(cache);
|
||||
} else
|
||||
cache = NULL;
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
read_unlock(&fs_info->block_group_cache_lock);
|
||||
return cache;
|
||||
}
|
||||
|
||||
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
/**
|
||||
* Check if we can do a NOCOW write for a given extent.
|
||||
*
|
||||
* @fs_info: The filesystem information object.
|
||||
* @bytenr: Logical start address of the extent.
|
||||
*
|
||||
* Check if we can do a NOCOW write for the given extent, and increments the
|
||||
* number of NOCOW writers in the block group that contains the extent, as long
|
||||
* as the block group exists and it's currently not in read-only mode.
|
||||
*
|
||||
* Returns: A non-NULL block group pointer if we can do a NOCOW write, the caller
|
||||
* is responsible for calling btrfs_dec_nocow_writers() later.
|
||||
*
|
||||
* Or NULL if we can not do a NOCOW write
|
||||
*/
|
||||
struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct btrfs_block_group *bg;
|
||||
bool ret = true;
|
||||
bool can_nocow = true;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
if (!bg)
|
||||
return false;
|
||||
return NULL;
|
||||
|
||||
spin_lock(&bg->lock);
|
||||
if (bg->ro)
|
||||
ret = false;
|
||||
can_nocow = false;
|
||||
else
|
||||
atomic_inc(&bg->nocow_writers);
|
||||
spin_unlock(&bg->lock);
|
||||
|
||||
/* No put on block group, done by btrfs_dec_nocow_writers */
|
||||
if (!ret)
|
||||
if (!can_nocow) {
|
||||
btrfs_put_block_group(bg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
/* No put on block group, done by btrfs_dec_nocow_writers(). */
|
||||
return bg;
|
||||
}
|
||||
|
||||
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
/**
|
||||
* Decrement the number of NOCOW writers in a block group.
|
||||
*
|
||||
* @bg: The block group.
|
||||
*
|
||||
* This is meant to be called after a previous call to btrfs_inc_nocow_writers(),
|
||||
* and on the block group returned by that call. Typically this is called after
|
||||
* creating an ordered extent for a NOCOW write, to prevent races with scrub and
|
||||
* relocation.
|
||||
*
|
||||
* After this call, the caller should not use the block group anymore. It it wants
|
||||
* to use it, then it should get a reference on it before calling this function.
|
||||
*/
|
||||
void btrfs_dec_nocow_writers(struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
ASSERT(bg);
|
||||
if (atomic_dec_and_test(&bg->nocow_writers))
|
||||
wake_up_var(&bg->nocow_writers);
|
||||
/*
|
||||
* Once for our lookup and once for the lookup done by a previous call
|
||||
* to btrfs_inc_nocow_writers()
|
||||
*/
|
||||
btrfs_put_block_group(bg);
|
||||
|
||||
/* For the lookup done by a previous call to btrfs_inc_nocow_writers(). */
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
|
@ -772,10 +792,10 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
|
|||
cache->has_caching_ctl = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
spin_lock(&fs_info->block_group_cache_lock);
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
refcount_inc(&caching_ctl->count);
|
||||
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
|
||||
btrfs_get_block_group(cache);
|
||||
|
||||
|
@ -957,17 +977,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
spin_lock(&fs_info->block_group_cache_lock);
|
||||
rb_erase(&block_group->cache_node,
|
||||
&fs_info->block_group_cache_tree);
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
rb_erase_cached(&block_group->cache_node,
|
||||
&fs_info->block_group_cache_tree);
|
||||
RB_CLEAR_NODE(&block_group->cache_node);
|
||||
|
||||
/* Once for the block groups rbtree */
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
if (fs_info->first_logical_byte == block_group->start)
|
||||
fs_info->first_logical_byte = (u64)-1;
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
|
||||
down_write(&block_group->space_info->groups_sem);
|
||||
/*
|
||||
|
@ -992,7 +1010,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|||
if (block_group->cached == BTRFS_CACHE_STARTED)
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
if (block_group->has_caching_ctl) {
|
||||
spin_lock(&fs_info->block_group_cache_lock);
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
if (!caching_ctl) {
|
||||
struct btrfs_caching_control *ctl;
|
||||
|
||||
|
@ -1006,7 +1024,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
if (caching_ctl)
|
||||
list_del_init(&caching_ctl->list);
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
if (caching_ctl) {
|
||||
/* Once for the caching bgs list and once for us. */
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
|
@ -1367,6 +1385,14 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|||
goto next;
|
||||
}
|
||||
|
||||
ret = btrfs_zone_finish(block_group);
|
||||
if (ret < 0) {
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
goto next;
|
||||
}
|
||||
|
||||
/*
|
||||
* Want to do this before we do anything else so we can recover
|
||||
* properly if we fail to join the transaction.
|
||||
|
@ -1512,6 +1538,13 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
|
|||
return bg1->used > bg2->used;
|
||||
}
|
||||
|
||||
static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_is_zoned(fs_info))
|
||||
return btrfs_zoned_should_reclaim(fs_info);
|
||||
return true;
|
||||
}
|
||||
|
||||
void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info =
|
||||
|
@ -1522,6 +1555,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
|||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (!btrfs_should_reclaim(fs_info))
|
||||
return;
|
||||
|
||||
sb_start_write(fs_info->sb);
|
||||
|
||||
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
|
||||
|
@ -1692,35 +1728,13 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_root *root = btrfs_block_group_root(fs_info);
|
||||
int ret;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
while (1) {
|
||||
slot = path->slots[0];
|
||||
leaf = path->nodes[0];
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret == 0)
|
||||
continue;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
|
||||
btrfs_for_each_slot(root, key, &found_key, path, ret) {
|
||||
if (found_key.objectid >= key->objectid &&
|
||||
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
|
||||
ret = read_bg_from_eb(fs_info, &found_key, path);
|
||||
break;
|
||||
return read_bg_from_eb(fs_info, &found_key, path);
|
||||
}
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3220,6 +3234,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline bool should_reclaim_block_group(struct btrfs_block_group *bg,
|
||||
u64 bytes_freed)
|
||||
{
|
||||
const struct btrfs_space_info *space_info = bg->space_info;
|
||||
const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
|
||||
const u64 new_val = bg->used;
|
||||
const u64 old_val = new_val + bytes_freed;
|
||||
u64 thresh;
|
||||
|
||||
if (reclaim_thresh == 0)
|
||||
return false;
|
||||
|
||||
thresh = div_factor_fine(bg->length, reclaim_thresh);
|
||||
|
||||
/*
|
||||
* If we were below the threshold before don't reclaim, we are likely a
|
||||
* brand new block group and we don't want to relocate new block groups.
|
||||
*/
|
||||
if (old_val < thresh)
|
||||
return false;
|
||||
if (new_val >= thresh)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, bool alloc)
|
||||
{
|
||||
|
@ -3242,6 +3281,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|||
spin_unlock(&info->delalloc_root_lock);
|
||||
|
||||
while (total) {
|
||||
bool reclaim;
|
||||
|
||||
cache = btrfs_lookup_block_group(info, bytenr);
|
||||
if (!cache) {
|
||||
ret = -ENOENT;
|
||||
|
@ -3287,6 +3328,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|||
cache->space_info, num_bytes);
|
||||
cache->space_info->bytes_used -= num_bytes;
|
||||
cache->space_info->disk_used -= num_bytes * factor;
|
||||
|
||||
reclaim = should_reclaim_block_group(cache, num_bytes);
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
|
||||
|
@ -3313,6 +3356,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|||
if (!alloc && old_val == 0) {
|
||||
if (!btrfs_test_opt(info, DISCARD_ASYNC))
|
||||
btrfs_mark_bg_unused(cache);
|
||||
} else if (!alloc && reclaim) {
|
||||
btrfs_mark_bg_to_reclaim(cache);
|
||||
}
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
|
@ -3957,14 +4002,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|||
struct btrfs_caching_control *caching_ctl;
|
||||
struct rb_node *n;
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
write_lock(&info->block_group_cache_lock);
|
||||
while (!list_empty(&info->caching_block_groups)) {
|
||||
caching_ctl = list_entry(info->caching_block_groups.next,
|
||||
struct btrfs_caching_control, list);
|
||||
list_del(&caching_ctl->list);
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
}
|
||||
spin_unlock(&info->block_group_cache_lock);
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
|
||||
spin_lock(&info->unused_bgs_lock);
|
||||
while (!list_empty(&info->unused_bgs)) {
|
||||
|
@ -3994,14 +4039,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|||
}
|
||||
spin_unlock(&info->zone_active_bgs_lock);
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
||||
write_lock(&info->block_group_cache_lock);
|
||||
while ((n = rb_last(&info->block_group_cache_tree.rb_root)) != NULL) {
|
||||
block_group = rb_entry(n, struct btrfs_block_group,
|
||||
cache_node);
|
||||
rb_erase(&block_group->cache_node,
|
||||
&info->block_group_cache_tree);
|
||||
rb_erase_cached(&block_group->cache_node,
|
||||
&info->block_group_cache_tree);
|
||||
RB_CLEAR_NODE(&block_group->cache_node);
|
||||
spin_unlock(&info->block_group_cache_lock);
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
|
||||
down_write(&block_group->space_info->groups_sem);
|
||||
list_del(&block_group->list);
|
||||
|
@ -4024,9 +4069,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|||
ASSERT(block_group->swap_extents == 0);
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
write_lock(&info->block_group_cache_lock);
|
||||
}
|
||||
spin_unlock(&info->block_group_cache_lock);
|
||||
write_unlock(&info->block_group_cache_lock);
|
||||
|
||||
btrfs_release_global_block_rsv(info);
|
||||
|
||||
|
|
|
@ -212,6 +212,8 @@ struct btrfs_block_group {
|
|||
u64 meta_write_pointer;
|
||||
struct map_lookup *physical_map;
|
||||
struct list_head active_bg_list;
|
||||
struct work_struct zone_finish_work;
|
||||
struct extent_buffer *last_eb;
|
||||
};
|
||||
|
||||
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||
|
@ -254,8 +256,9 @@ void btrfs_put_block_group(struct btrfs_block_group *cache);
|
|||
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
|
||||
const u64 start);
|
||||
void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
|
||||
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr);
|
||||
void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
|
||||
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
|
||||
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
|
||||
u64 num_bytes);
|
||||
|
|
|
@ -395,31 +395,6 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
|
|||
return true;
|
||||
}
|
||||
|
||||
struct btrfs_dio_private {
|
||||
struct inode *inode;
|
||||
|
||||
/*
|
||||
* Since DIO can use anonymous page, we cannot use page_offset() to
|
||||
* grab the file offset, thus need a dedicated member for file offset.
|
||||
*/
|
||||
u64 file_offset;
|
||||
u64 disk_bytenr;
|
||||
/* Used for bio::bi_size */
|
||||
u32 bytes;
|
||||
|
||||
/*
|
||||
* References to this structure. There is one reference per in-flight
|
||||
* bio plus one while we're still setting up.
|
||||
*/
|
||||
refcount_t refs;
|
||||
|
||||
/* dio_bio came from fs/direct-io.c */
|
||||
struct bio *dio_bio;
|
||||
|
||||
/* Array of checksums */
|
||||
u8 csums[];
|
||||
};
|
||||
|
||||
/*
|
||||
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
|
||||
* separate u32s. These two functions convert between the two representations.
|
||||
|
|
|
@ -1552,21 +1552,18 @@ static int btrfsic_read_block(struct btrfsic_state *state,
|
|||
return -ENOMEM;
|
||||
block_ctx->datav = block_ctx->mem_to_free;
|
||||
block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
block_ctx->pagev[i] = alloc_page(GFP_NOFS);
|
||||
if (!block_ctx->pagev[i])
|
||||
return -1;
|
||||
}
|
||||
ret = btrfs_alloc_page_array(num_pages, block_ctx->pagev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
dev_bytenr = block_ctx->dev_bytenr;
|
||||
for (i = 0; i < num_pages;) {
|
||||
struct bio *bio;
|
||||
unsigned int j;
|
||||
|
||||
bio = btrfs_bio_alloc(num_pages - i);
|
||||
bio_set_dev(bio, block_ctx->dev->bdev);
|
||||
bio = bio_alloc(block_ctx->dev->bdev, num_pages - i,
|
||||
REQ_OP_READ, GFP_NOFS);
|
||||
bio->bi_iter.bi_sector = dev_bytenr >> 9;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
|
||||
for (j = i; j < num_pages; j++) {
|
||||
ret = bio_add_page(bio, block_ctx->pagev[j],
|
||||
|
@ -2033,7 +2030,7 @@ continue_loop:
|
|||
|
||||
static void btrfsic_bio_end_io(struct bio *bp)
|
||||
{
|
||||
struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
|
||||
struct btrfsic_block *block = bp->bi_private;
|
||||
int iodone_w_error;
|
||||
|
||||
/* mutex is not held! This is not save if IO is not yet completed
|
||||
|
@ -2635,102 +2632,95 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
|
|||
&btrfsic_dev_state_hashtable);
|
||||
}
|
||||
|
||||
static void __btrfsic_submit_bio(struct bio *bio)
|
||||
static void btrfsic_check_write_bio(struct bio *bio, struct btrfsic_dev_state *dev_state)
|
||||
{
|
||||
unsigned int segs = bio_segments(bio);
|
||||
u64 dev_bytenr = 512 * bio->bi_iter.bi_sector;
|
||||
u64 cur_bytenr = dev_bytenr;
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bvec;
|
||||
char **mapped_datav;
|
||||
int bio_is_patched = 0;
|
||||
int i = 0;
|
||||
|
||||
if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||
pr_info(
|
||||
"submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
|
||||
bio_op(bio), bio->bi_opf, segs,
|
||||
bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev);
|
||||
|
||||
mapped_datav = kmalloc_array(segs, sizeof(*mapped_datav), GFP_NOFS);
|
||||
if (!mapped_datav)
|
||||
return;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
BUG_ON(bvec.bv_len != PAGE_SIZE);
|
||||
mapped_datav[i] = page_address(bvec.bv_page);
|
||||
i++;
|
||||
|
||||
if (dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
|
||||
pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n",
|
||||
i, cur_bytenr, bvec.bv_len, bvec.bv_offset);
|
||||
cur_bytenr += bvec.bv_len;
|
||||
}
|
||||
|
||||
btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, segs,
|
||||
bio, &bio_is_patched, bio->bi_opf);
|
||||
kfree(mapped_datav);
|
||||
}
|
||||
|
||||
static void btrfsic_check_flush_bio(struct bio *bio, struct btrfsic_dev_state *dev_state)
|
||||
{
|
||||
if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||
pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
|
||||
bio_op(bio), bio->bi_opf, bio->bi_bdev);
|
||||
|
||||
if (dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
|
||||
struct btrfsic_block *const block =
|
||||
&dev_state->dummy_block_for_bio_bh_flush;
|
||||
|
||||
block->is_iodone = 0;
|
||||
block->never_written = 0;
|
||||
block->iodone_w_error = 0;
|
||||
block->flush_gen = dev_state->last_flush_gen + 1;
|
||||
block->submit_bio_bh_rw = bio->bi_opf;
|
||||
block->orig_bio_private = bio->bi_private;
|
||||
block->orig_bio_end_io = bio->bi_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bio->bi_private = block;
|
||||
bio->bi_end_io = btrfsic_bio_end_io;
|
||||
} else if ((dev_state->state->print_mask &
|
||||
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
|
||||
BTRFSIC_PRINT_MASK_VERBOSE))) {
|
||||
pr_info(
|
||||
"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
|
||||
dev_state->bdev);
|
||||
}
|
||||
}
|
||||
|
||||
void btrfsic_check_bio(struct bio *bio)
|
||||
{
|
||||
struct btrfsic_dev_state *dev_state;
|
||||
|
||||
if (!btrfsic_is_initialized)
|
||||
return;
|
||||
|
||||
mutex_lock(&btrfsic_mutex);
|
||||
/* since btrfsic_submit_bio() is also called before
|
||||
* btrfsic_mount(), this might return NULL */
|
||||
/*
|
||||
* We can be called before btrfsic_mount, so there might not be a
|
||||
* dev_state.
|
||||
*/
|
||||
dev_state = btrfsic_dev_state_lookup(bio->bi_bdev->bd_dev);
|
||||
if (NULL != dev_state &&
|
||||
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
|
||||
int i = 0;
|
||||
u64 dev_bytenr;
|
||||
u64 cur_bytenr;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
int bio_is_patched;
|
||||
char **mapped_datav;
|
||||
unsigned int segs = bio_segments(bio);
|
||||
|
||||
dev_bytenr = 512 * bio->bi_iter.bi_sector;
|
||||
bio_is_patched = 0;
|
||||
if (dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||
pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
|
||||
bio_op(bio), bio->bi_opf, segs,
|
||||
bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev);
|
||||
|
||||
mapped_datav = kmalloc_array(segs,
|
||||
sizeof(*mapped_datav), GFP_NOFS);
|
||||
if (!mapped_datav)
|
||||
goto leave;
|
||||
cur_bytenr = dev_bytenr;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
BUG_ON(bvec.bv_len != PAGE_SIZE);
|
||||
mapped_datav[i] = page_address(bvec.bv_page);
|
||||
i++;
|
||||
|
||||
if (dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
|
||||
pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n",
|
||||
i, cur_bytenr, bvec.bv_len, bvec.bv_offset);
|
||||
cur_bytenr += bvec.bv_len;
|
||||
}
|
||||
btrfsic_process_written_block(dev_state, dev_bytenr,
|
||||
mapped_datav, segs,
|
||||
bio, &bio_is_patched,
|
||||
bio->bi_opf);
|
||||
kfree(mapped_datav);
|
||||
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
|
||||
if (dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||
pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
|
||||
bio_op(bio), bio->bi_opf, bio->bi_bdev);
|
||||
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
|
||||
if ((dev_state->state->print_mask &
|
||||
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
|
||||
BTRFSIC_PRINT_MASK_VERBOSE)))
|
||||
pr_info(
|
||||
"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
|
||||
dev_state->bdev);
|
||||
} else {
|
||||
struct btrfsic_block *const block =
|
||||
&dev_state->dummy_block_for_bio_bh_flush;
|
||||
|
||||
block->is_iodone = 0;
|
||||
block->never_written = 0;
|
||||
block->iodone_w_error = 0;
|
||||
block->flush_gen = dev_state->last_flush_gen + 1;
|
||||
block->submit_bio_bh_rw = bio->bi_opf;
|
||||
block->orig_bio_private = bio->bi_private;
|
||||
block->orig_bio_end_io = bio->bi_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bio->bi_private = block;
|
||||
bio->bi_end_io = btrfsic_bio_end_io;
|
||||
}
|
||||
mutex_lock(&btrfsic_mutex);
|
||||
if (dev_state) {
|
||||
if (bio_op(bio) == REQ_OP_WRITE && bio_has_data(bio))
|
||||
btrfsic_check_write_bio(bio, dev_state);
|
||||
else if (bio->bi_opf & REQ_PREFLUSH)
|
||||
btrfsic_check_flush_bio(bio, dev_state);
|
||||
}
|
||||
leave:
|
||||
mutex_unlock(&btrfsic_mutex);
|
||||
}
|
||||
|
||||
void btrfsic_submit_bio(struct bio *bio)
|
||||
{
|
||||
__btrfsic_submit_bio(bio);
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
int btrfsic_submit_bio_wait(struct bio *bio)
|
||||
{
|
||||
__btrfsic_submit_bio(bio);
|
||||
return submit_bio_wait(bio);
|
||||
}
|
||||
|
||||
int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
int including_extent_data, u32 print_mask)
|
||||
|
|
|
@ -7,11 +7,9 @@
|
|||
#define BTRFS_CHECK_INTEGRITY_H
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
void btrfsic_submit_bio(struct bio *bio);
|
||||
int btrfsic_submit_bio_wait(struct bio *bio);
|
||||
void btrfsic_check_bio(struct bio *bio);
|
||||
#else
|
||||
#define btrfsic_submit_bio submit_bio
|
||||
#define btrfsic_submit_bio_wait submit_bio_wait
|
||||
static inline void btrfsic_check_bio(struct bio *bio) { }
|
||||
#endif
|
||||
|
||||
int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||
|
|
|
@ -425,7 +425,6 @@ out:
|
|||
}
|
||||
|
||||
static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
|
||||
struct compressed_bio *cb,
|
||||
struct bio *bio, int mirror_num)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
@ -604,7 +603,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
|||
goto finish_cb;
|
||||
}
|
||||
|
||||
ret = submit_compressed_bio(fs_info, cb, bio, 0);
|
||||
ret = submit_compressed_bio(fs_info, bio, 0);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
bio = NULL;
|
||||
|
@ -802,15 +801,13 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
|||
* After the compressed pages are read, we copy the bytes into the
|
||||
* bio we were passed and then call the bio end_io calls
|
||||
*/
|
||||
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags)
|
||||
void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct extent_map_tree *em_tree;
|
||||
struct compressed_bio *cb;
|
||||
unsigned int compressed_len;
|
||||
unsigned int nr_pages;
|
||||
unsigned int pg_index;
|
||||
struct bio *comp_bio = NULL;
|
||||
const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||
u64 cur_disk_byte = disk_bytenr;
|
||||
|
@ -820,7 +817,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||
u64 em_start;
|
||||
struct extent_map *em;
|
||||
blk_status_t ret;
|
||||
int faili = 0;
|
||||
int ret2;
|
||||
int i;
|
||||
u8 *sums;
|
||||
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
|
@ -855,32 +853,26 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||
em_len = em->len;
|
||||
em_start = em->start;
|
||||
|
||||
cb->len = bio->bi_iter.bi_size;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->compress_type = em->compress_type;
|
||||
cb->orig_bio = bio;
|
||||
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
|
||||
cb->len = bio->bi_iter.bi_size;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->compress_type = extent_compress_type(bio_flags);
|
||||
cb->orig_bio = bio;
|
||||
|
||||
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
|
||||
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
|
||||
GFP_NOFS);
|
||||
cb->nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
|
||||
cb->compressed_pages = kcalloc(cb->nr_pages, sizeof(struct page *), GFP_NOFS);
|
||||
if (!cb->compressed_pages) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto fail1;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
|
||||
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
|
||||
if (!cb->compressed_pages[pg_index]) {
|
||||
faili = pg_index - 1;
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto fail2;
|
||||
}
|
||||
ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages);
|
||||
if (ret2) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto fail;
|
||||
}
|
||||
faili = nr_pages - 1;
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
add_ra_bio_pages(inode, em_start + em_len, cb);
|
||||
|
||||
|
@ -949,28 +941,29 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||
fs_info->sectorsize);
|
||||
sums += fs_info->csum_size * nr_sectors;
|
||||
|
||||
ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num);
|
||||
ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
comp_bio = NULL;
|
||||
}
|
||||
}
|
||||
return BLK_STS_OK;
|
||||
return;
|
||||
|
||||
fail2:
|
||||
while (faili >= 0) {
|
||||
__free_page(cb->compressed_pages[faili]);
|
||||
faili--;
|
||||
fail:
|
||||
if (cb->compressed_pages) {
|
||||
for (i = 0; i < cb->nr_pages; i++) {
|
||||
if (cb->compressed_pages[i])
|
||||
__free_page(cb->compressed_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(cb->compressed_pages);
|
||||
fail1:
|
||||
kfree(cb);
|
||||
out:
|
||||
free_extent_map(em);
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
return ret;
|
||||
return;
|
||||
finish_cb:
|
||||
if (comp_bio) {
|
||||
comp_bio->bi_status = ret;
|
||||
|
@ -978,7 +971,7 @@ finish_cb:
|
|||
}
|
||||
/* All bytes of @cb is submitted, endio will free @cb */
|
||||
if (cur_disk_byte == disk_bytenr + compressed_len)
|
||||
return ret;
|
||||
return;
|
||||
|
||||
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||
(disk_bytenr + compressed_len - cur_disk_byte) >>
|
||||
|
@ -990,7 +983,6 @@ finish_cb:
|
|||
ASSERT(refcount_read(&cb->pending_sectors));
|
||||
/* Now we are the only one referring @cb, can finish it safely. */
|
||||
finish_compressed_bio_read(cb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -102,8 +102,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
|||
unsigned int write_flags,
|
||||
struct cgroup_subsys_state *blkcg_css,
|
||||
bool writeback);
|
||||
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num);
|
||||
|
||||
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
|
||||
|
||||
|
|
102
fs/btrfs/ctree.c
102
fs/btrfs/ctree.c
|
@ -16,6 +16,7 @@
|
|||
#include "volumes.h"
|
||||
#include "qgroup.h"
|
||||
#include "tree-mod-log.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_path *path, int level);
|
||||
|
@ -342,7 +343,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
|||
int level = btrfs_header_level(buf);
|
||||
|
||||
ret = btrfs_set_disk_extent_flags(trans, buf,
|
||||
new_flags, level, 0);
|
||||
new_flags, level);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -1390,12 +1391,13 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
|
|||
}
|
||||
|
||||
/*
|
||||
* helper function for btrfs_search_slot. The goal is to find a block
|
||||
* in cache without setting the path to blocking. If we find the block
|
||||
* we return zero and the path is unchanged.
|
||||
* Helper function for btrfs_search_slot() and other functions that do a search
|
||||
* on a btree. The goal is to find a tree block in the cache (the radix tree at
|
||||
* fs_info->buffer_radix), but if we can't find it, or it's not up to date, read
|
||||
* its pages from disk.
|
||||
*
|
||||
* If we can't find the block, we set the path blocking and do some
|
||||
* reada. -EAGAIN is returned and the search must be repeated.
|
||||
* Returns -EAGAIN, with the path unlocked, if the caller needs to repeat the
|
||||
* whole btree search, starting again from the current root node.
|
||||
*/
|
||||
static int
|
||||
read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
|
@ -1409,12 +1411,21 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
struct btrfs_key first_key;
|
||||
int ret;
|
||||
int parent_level;
|
||||
bool unlock_up;
|
||||
|
||||
unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
|
||||
blocknr = btrfs_node_blockptr(*eb_ret, slot);
|
||||
gen = btrfs_node_ptr_generation(*eb_ret, slot);
|
||||
parent_level = btrfs_header_level(*eb_ret);
|
||||
btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
|
||||
|
||||
/*
|
||||
* If we need to read an extent buffer from disk and we are holding locks
|
||||
* on upper level nodes, we unlock all the upper nodes before reading the
|
||||
* extent buffer, and then return -EAGAIN to the caller as it needs to
|
||||
* restart the search. We don't release the lock on the current level
|
||||
* because we need to walk this node to figure out which blocks to read.
|
||||
*/
|
||||
tmp = find_extent_buffer(fs_info, blocknr);
|
||||
if (tmp) {
|
||||
if (p->reada == READA_FORWARD_ALWAYS)
|
||||
|
@ -1436,30 +1447,38 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
|
||||
/* now we're allowed to do a blocking uptodate check */
|
||||
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
|
||||
ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key);
|
||||
if (ret) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return -EIO;
|
||||
}
|
||||
*eb_ret = tmp;
|
||||
return 0;
|
||||
if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
ret = -EAGAIN;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* reduce lock contention at high levels
|
||||
* of the btree by dropping locks before
|
||||
* we read. Don't release the lock on the current
|
||||
* level because we need to walk this node to figure
|
||||
* out which blocks to read.
|
||||
*/
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
if (unlock_up) {
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
ret = -EAGAIN;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (p->reada != READA_NONE)
|
||||
reada_for_search(fs_info, p, level, slot, key->objectid);
|
||||
|
||||
ret = -EAGAIN;
|
||||
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
|
||||
gen, parent_level - 1, &first_key);
|
||||
if (IS_ERR(tmp)) {
|
||||
|
@ -1474,9 +1493,15 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
|||
*/
|
||||
if (!extent_buffer_uptodate(tmp))
|
||||
ret = -EIO;
|
||||
free_extent_buffer(tmp);
|
||||
|
||||
btrfs_release_path(p);
|
||||
out:
|
||||
if (ret == 0) {
|
||||
*eb_ret = tmp;
|
||||
} else {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2279,6 +2304,43 @@ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a valid slot for the given path.
|
||||
*
|
||||
* @root: The root node of the tree.
|
||||
* @key: Will contain a valid item if found.
|
||||
* @path: The starting point to validate the slot.
|
||||
*
|
||||
* Return: 0 if the item is valid
|
||||
* 1 if not found
|
||||
* <0 if error.
|
||||
*/
|
||||
int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
while (1) {
|
||||
int ret;
|
||||
const int slot = path->slots[0];
|
||||
const struct extent_buffer *leaf = path->nodes[0];
|
||||
|
||||
/* This is where we start walking the path. */
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
/*
|
||||
* If we've reached the last slot in this leaf we need
|
||||
* to go to the next leaf and reset the path.
|
||||
*/
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret)
|
||||
return ret;
|
||||
continue;
|
||||
}
|
||||
/* Store the found, valid item in @key. */
|
||||
btrfs_item_key_to_cpu(leaf, key, slot);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* adjust the pointers going up the tree, starting at level
|
||||
* making sure the right key of each node is points to 'key'.
|
||||
|
|
165
fs/btrfs/ctree.h
165
fs/btrfs/ctree.h
|
@ -675,13 +675,13 @@ struct btrfs_fs_info {
|
|||
rwlock_t global_root_lock;
|
||||
struct rb_root global_root_tree;
|
||||
|
||||
spinlock_t fs_roots_radix_lock;
|
||||
struct radix_tree_root fs_roots_radix;
|
||||
/* The xarray that holds all the FS roots */
|
||||
spinlock_t fs_roots_lock;
|
||||
struct xarray fs_roots;
|
||||
|
||||
/* block group cache stuff */
|
||||
spinlock_t block_group_cache_lock;
|
||||
u64 first_logical_byte;
|
||||
struct rb_root block_group_cache_tree;
|
||||
rwlock_t block_group_cache_lock;
|
||||
struct rb_root_cached block_group_cache_tree;
|
||||
|
||||
/* keep track of unallocated space */
|
||||
atomic64_t free_chunk_space;
|
||||
|
@ -848,12 +848,13 @@ struct btrfs_fs_info {
|
|||
* two
|
||||
*/
|
||||
struct btrfs_workqueue *workers;
|
||||
struct btrfs_workqueue *hipri_workers;
|
||||
struct btrfs_workqueue *delalloc_workers;
|
||||
struct btrfs_workqueue *flush_workers;
|
||||
struct btrfs_workqueue *endio_workers;
|
||||
struct btrfs_workqueue *endio_meta_workers;
|
||||
struct btrfs_workqueue *endio_raid56_workers;
|
||||
struct btrfs_workqueue *rmw_workers;
|
||||
struct workqueue_struct *rmw_workers;
|
||||
struct btrfs_workqueue *endio_meta_write_workers;
|
||||
struct btrfs_workqueue *endio_write_workers;
|
||||
struct btrfs_workqueue *endio_freespace_worker;
|
||||
|
@ -946,9 +947,9 @@ struct btrfs_fs_info {
|
|||
* running.
|
||||
*/
|
||||
refcount_t scrub_workers_refcnt;
|
||||
struct btrfs_workqueue *scrub_workers;
|
||||
struct btrfs_workqueue *scrub_wr_completion_workers;
|
||||
struct btrfs_workqueue *scrub_parity_workers;
|
||||
struct workqueue_struct *scrub_workers;
|
||||
struct workqueue_struct *scrub_wr_completion_workers;
|
||||
struct workqueue_struct *scrub_parity_workers;
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
struct btrfs_discard_ctl discard_ctl;
|
||||
|
@ -994,10 +995,10 @@ struct btrfs_fs_info {
|
|||
|
||||
struct btrfs_delayed_root *delayed_root;
|
||||
|
||||
/* Extent buffer radix tree */
|
||||
/* Extent buffer xarray */
|
||||
spinlock_t buffer_lock;
|
||||
/* Entries are eb->start / sectorsize */
|
||||
struct radix_tree_root buffer_radix;
|
||||
struct xarray extent_buffers;
|
||||
|
||||
/* next backup root to be overwritten */
|
||||
int backup_root_index;
|
||||
|
@ -1045,10 +1046,7 @@ struct btrfs_fs_info {
|
|||
* Zone size > 0 when in ZONED mode, otherwise it's used for a check
|
||||
* if the mode is enabled
|
||||
*/
|
||||
union {
|
||||
u64 zone_size;
|
||||
u64 zoned;
|
||||
};
|
||||
u64 zone_size;
|
||||
|
||||
struct mutex zoned_meta_io_lock;
|
||||
spinlock_t treelog_bg_lock;
|
||||
|
@ -1121,7 +1119,8 @@ enum {
|
|||
*/
|
||||
BTRFS_ROOT_SHAREABLE,
|
||||
BTRFS_ROOT_TRACK_DIRTY,
|
||||
BTRFS_ROOT_IN_RADIX,
|
||||
/* The root is tracked in fs_info::fs_roots */
|
||||
BTRFS_ROOT_REGISTERED,
|
||||
BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
|
||||
BTRFS_ROOT_DEFRAG_RUNNING,
|
||||
BTRFS_ROOT_FORCE_COW,
|
||||
|
@ -1225,10 +1224,10 @@ struct btrfs_root {
|
|||
struct rb_root inode_tree;
|
||||
|
||||
/*
|
||||
* radix tree that keeps track of delayed nodes of every inode,
|
||||
* protected by inode_lock
|
||||
* Xarray that keeps track of delayed nodes of every inode, protected
|
||||
* by inode_lock
|
||||
*/
|
||||
struct radix_tree_root delayed_nodes_tree;
|
||||
struct xarray delayed_nodes;
|
||||
/*
|
||||
* right now this just gets used so that a root has its own devid
|
||||
* for stat. It may be used for more later
|
||||
|
@ -2784,7 +2783,8 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
|
|||
u64 bytenr, u64 num_bytes);
|
||||
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
|
||||
int btrfs_cross_ref_exist(struct btrfs_root *root,
|
||||
u64 objectid, u64 offset, u64 bytenr, bool strict);
|
||||
u64 objectid, u64 offset, u64 bytenr, bool strict,
|
||||
struct btrfs_path *path);
|
||||
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 parent, u64 root_objectid,
|
||||
|
@ -2811,8 +2811,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|||
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct extent_buffer *buf, int full_backref);
|
||||
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *eb, u64 flags,
|
||||
int level, int is_data);
|
||||
struct extent_buffer *eb, u64 flags, int level);
|
||||
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
|
||||
|
||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
|
@ -2892,7 +2891,7 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
|
|||
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
u64 disk_num_bytes);
|
||||
u64 disk_num_bytes, bool noflush);
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
|
||||
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 end);
|
||||
|
@ -3039,6 +3038,35 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
|||
int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
|
||||
struct btrfs_path *path);
|
||||
|
||||
int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
|
||||
struct btrfs_path *path);
|
||||
|
||||
/*
|
||||
* Search in @root for a given @key, and store the slot found in @found_key.
|
||||
*
|
||||
* @root: The root node of the tree.
|
||||
* @key: The key we are looking for.
|
||||
* @found_key: Will hold the found item.
|
||||
* @path: Holds the current slot/leaf.
|
||||
* @iter_ret: Contains the value returned from btrfs_search_slot or
|
||||
* btrfs_get_next_valid_item, whichever was executed last.
|
||||
*
|
||||
* The @iter_ret is an output variable that will contain the return value of
|
||||
* btrfs_search_slot, if it encountered an error, or the value returned from
|
||||
* btrfs_get_next_valid_item otherwise. That return value can be 0, if a valid
|
||||
* slot was found, 1 if there were no more leaves, and <0 if there was an error.
|
||||
*
|
||||
* It's recommended to use a separate variable for iter_ret and then use it to
|
||||
* set the function return value so there's no confusion of the 0/1/errno
|
||||
* values stemming from btrfs_search_slot.
|
||||
*/
|
||||
#define btrfs_for_each_slot(root, key, found_key, path, iter_ret) \
|
||||
for (iter_ret = btrfs_search_slot(NULL, (root), (key), (path), 0, 0); \
|
||||
(iter_ret) >= 0 && \
|
||||
(iter_ret = btrfs_get_next_valid_item((root), (found_key), (path))) == 0; \
|
||||
(path)->slots[0]++ \
|
||||
)
|
||||
|
||||
static inline int btrfs_next_old_item(struct btrfs_root *root,
|
||||
struct btrfs_path *p, u64 time_seq)
|
||||
{
|
||||
|
@ -3190,7 +3218,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
|
|||
int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
|
||||
|
||||
/* file-item.c */
|
||||
struct btrfs_dio_private;
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 bytenr, u64 len);
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
|
||||
|
@ -3224,8 +3251,8 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
|
|||
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
||||
|
||||
/* inode.c */
|
||||
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, enum btrfs_compression_type compress_type);
|
||||
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page,
|
||||
u64 start, u64 end);
|
||||
|
@ -3255,10 +3282,28 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
|
|||
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
unsigned int extra_bits,
|
||||
struct extent_state **cached_state);
|
||||
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *new_root,
|
||||
struct btrfs_root *parent_root,
|
||||
struct user_namespace *mnt_userns);
|
||||
struct btrfs_new_inode_args {
|
||||
/* Input */
|
||||
struct inode *dir;
|
||||
struct dentry *dentry;
|
||||
struct inode *inode;
|
||||
bool orphan;
|
||||
bool subvol;
|
||||
|
||||
/*
|
||||
* Output from btrfs_new_inode_prepare(), input to
|
||||
* btrfs_create_new_inode().
|
||||
*/
|
||||
struct posix_acl *default_acl;
|
||||
struct posix_acl *acl;
|
||||
};
|
||||
int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
|
||||
unsigned int *trans_num_items);
|
||||
int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_new_inode_args *args);
|
||||
void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args);
|
||||
struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
|
||||
struct inode *dir);
|
||||
void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
|
||||
unsigned *bits);
|
||||
void btrfs_clear_delalloc_extent(struct inode *inode,
|
||||
|
@ -3269,7 +3314,6 @@ void btrfs_split_delalloc_extent(struct inode *inode,
|
|||
struct extent_state *orig, u64 split);
|
||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
|
||||
int btrfs_readpage(struct file *file, struct page *page);
|
||||
void btrfs_evict_inode(struct inode *inode);
|
||||
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
|
||||
struct inode *btrfs_alloc_inode(struct super_block *sb);
|
||||
|
@ -3314,9 +3358,9 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
|||
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before);
|
||||
|
||||
extern const struct dentry_operations btrfs_dentry_operations;
|
||||
extern const struct iomap_ops btrfs_dio_iomap_ops;
|
||||
extern const struct iomap_dio_ops btrfs_dio_ops;
|
||||
|
||||
/* Inode locking type flags, by default the exclusive lock is taken */
|
||||
#define BTRFS_ILOCK_SHARED (1U << 0)
|
||||
|
@ -3328,6 +3372,7 @@ void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags);
|
|||
void btrfs_update_inode_bytes(struct btrfs_inode *inode,
|
||||
const u64 add_bytes,
|
||||
const u64 del_bytes);
|
||||
void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
|
||||
/* ioctl.c */
|
||||
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
|
@ -3403,11 +3448,29 @@ void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
|||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
#ifdef CONFIG_PRINTK_INDEX
|
||||
|
||||
#define btrfs_printk(fs_info, fmt, args...) \
|
||||
do { \
|
||||
printk_index_subsys_emit("%sBTRFS %s (device %s): ", NULL, fmt); \
|
||||
_btrfs_printk(fs_info, fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
__printf(2, 3)
|
||||
__cold
|
||||
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
|
||||
void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
|
||||
|
||||
#elif defined(CONFIG_PRINTK)
|
||||
|
||||
#define btrfs_printk(fs_info, fmt, args...) \
|
||||
_btrfs_printk(fs_info, fmt, ##args)
|
||||
|
||||
__printf(2, 3)
|
||||
__cold
|
||||
void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
|
||||
|
||||
#else
|
||||
|
||||
#define btrfs_printk(fs_info, fmt, args...) \
|
||||
btrfs_no_printk(fs_info, fmt, ##args)
|
||||
#endif
|
||||
|
@ -3658,12 +3721,25 @@ do { \
|
|||
__LINE__, (errno)); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_PRINTK_INDEX
|
||||
|
||||
#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \
|
||||
do { \
|
||||
__btrfs_handle_fs_error((fs_info), __func__, __LINE__, \
|
||||
(errno), fmt, ##args); \
|
||||
do { \
|
||||
printk_index_subsys_emit( \
|
||||
"BTRFS: error (device %s%s) in %s:%d: errno=%d %s", \
|
||||
KERN_CRIT, fmt); \
|
||||
__btrfs_handle_fs_error((fs_info), __func__, __LINE__, \
|
||||
(errno), fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \
|
||||
__btrfs_handle_fs_error((fs_info), __func__, __LINE__, \
|
||||
(errno), fmt, ##args)
|
||||
|
||||
#endif
|
||||
|
||||
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
|
||||
|
@ -3816,15 +3892,16 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
|
|||
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
|
||||
int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
|
||||
struct posix_acl *acl, int type);
|
||||
int btrfs_init_acl(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct inode *dir);
|
||||
int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
struct posix_acl *acl, int type);
|
||||
#else
|
||||
#define btrfs_get_acl NULL
|
||||
#define btrfs_set_acl NULL
|
||||
static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct inode *dir)
|
||||
static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct posix_acl *acl,
|
||||
int type)
|
||||
{
|
||||
return 0;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -3929,7 +4006,7 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
|||
|
||||
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return fs_info->zoned != 0;
|
||||
return fs_info->zone_size > 0;
|
||||
}
|
||||
|
||||
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
|
||||
|
|
|
@ -289,7 +289,7 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
u64 disk_num_bytes)
|
||||
u64 disk_num_bytes, bool noflush)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
@ -308,7 +308,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
|||
* If we have a transaction open (can happen if we call truncate_block
|
||||
* from truncate), then we need FLUSH_LIMIT so we don't deadlock.
|
||||
*/
|
||||
if (btrfs_is_free_space_inode(inode)) {
|
||||
if (noflush || btrfs_is_free_space_inode(inode)) {
|
||||
flush = BTRFS_RESERVE_NO_FLUSH;
|
||||
} else {
|
||||
if (current->journal_info)
|
||||
|
@ -333,7 +333,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
|||
*/
|
||||
calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
|
||||
&meta_reserve, &qgroup_reserve);
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true,
|
||||
noflush);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, meta_reserve, flush);
|
||||
|
@ -456,7 +457,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
|||
ret = btrfs_check_data_free_space(inode, reserved, start, len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, len, len);
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, len, len, false);
|
||||
if (ret < 0) {
|
||||
btrfs_free_reserved_data_space(inode, *reserved, start, len);
|
||||
extent_changeset_free(*reserved);
|
||||
|
|
|
@ -78,7 +78,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
|||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
|
||||
node = xa_load(&root->delayed_nodes, ino);
|
||||
|
||||
if (node) {
|
||||
if (btrfs_inode->delayed_node) {
|
||||
|
@ -90,9 +90,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
|||
|
||||
/*
|
||||
* It's possible that we're racing into the middle of removing
|
||||
* this node from the radix tree. In this case, the refcount
|
||||
* this node from the xarray. In this case, the refcount
|
||||
* was zero and it should never go back to one. Just return
|
||||
* NULL like it was never in the radix at all; our release
|
||||
* NULL like it was never in the xarray at all; our release
|
||||
* function is in the process of removing it.
|
||||
*
|
||||
* Some implementations of refcount_inc refuse to bump the
|
||||
|
@ -100,7 +100,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
|||
* here, refcount_inc() may decide to just WARN_ONCE() instead
|
||||
* of actually bumping the refcount.
|
||||
*
|
||||
* If this node is properly in the radix, we want to bump the
|
||||
* If this node is properly in the xarray, we want to bump the
|
||||
* refcount twice, once for the inode and once for this get
|
||||
* operation.
|
||||
*/
|
||||
|
@ -128,36 +128,30 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
|
|||
u64 ino = btrfs_ino(btrfs_inode);
|
||||
int ret;
|
||||
|
||||
again:
|
||||
node = btrfs_get_delayed_node(btrfs_inode);
|
||||
if (node)
|
||||
return node;
|
||||
do {
|
||||
node = btrfs_get_delayed_node(btrfs_inode);
|
||||
if (node)
|
||||
return node;
|
||||
|
||||
node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
|
||||
if (!node)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
btrfs_init_delayed_node(node, root, ino);
|
||||
node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
|
||||
if (!node)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
btrfs_init_delayed_node(node, root, ino);
|
||||
|
||||
/* cached in the btrfs inode and can be accessed */
|
||||
refcount_set(&node->refs, 2);
|
||||
/* Cached in the inode and can be accessed */
|
||||
refcount_set(&node->refs, 2);
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret) {
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
|
||||
if (ret == -EEXIST) {
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
radix_tree_preload_end();
|
||||
goto again;
|
||||
}
|
||||
spin_lock(&root->inode_lock);
|
||||
ret = xa_insert(&root->delayed_nodes, ino, node, GFP_NOFS);
|
||||
if (ret) {
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
if (ret != -EBUSY)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
} while (ret);
|
||||
btrfs_inode->delayed_node = node;
|
||||
spin_unlock(&root->inode_lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
return node;
|
||||
}
|
||||
|
@ -276,8 +270,7 @@ static void __btrfs_release_delayed_node(
|
|||
* back up. We can delete it now.
|
||||
*/
|
||||
ASSERT(refcount_read(&delayed_node->refs) == 0);
|
||||
radix_tree_delete(&root->delayed_nodes_tree,
|
||||
delayed_node->inode_id);
|
||||
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, delayed_node);
|
||||
}
|
||||
|
@ -1870,34 +1863,35 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
|
|||
|
||||
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
|
||||
{
|
||||
u64 inode_id = 0;
|
||||
unsigned long index = 0;
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
struct btrfs_delayed_node *delayed_nodes[8];
|
||||
int i, n;
|
||||
|
||||
while (1) {
|
||||
int n = 0;
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
|
||||
(void **)delayed_nodes, inode_id,
|
||||
ARRAY_SIZE(delayed_nodes));
|
||||
if (!n) {
|
||||
if (xa_empty(&root->delayed_nodes)) {
|
||||
spin_unlock(&root->inode_lock);
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
||||
inode_id = delayed_nodes[n - 1]->inode_id + 1;
|
||||
for (i = 0; i < n; i++) {
|
||||
xa_for_each_start(&root->delayed_nodes, index, delayed_node, index) {
|
||||
/*
|
||||
* Don't increase refs in case the node is dead and
|
||||
* about to be removed from the tree in the loop below
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
|
||||
delayed_nodes[i] = NULL;
|
||||
if (refcount_inc_not_zero(&delayed_node->refs)) {
|
||||
delayed_nodes[n] = delayed_node;
|
||||
n++;
|
||||
}
|
||||
if (n >= ARRAY_SIZE(delayed_nodes))
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
spin_unlock(&root->inode_lock);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (!delayed_nodes[i])
|
||||
continue;
|
||||
for (int i = 0; i < n; i++) {
|
||||
__btrfs_kill_delayed_node(delayed_nodes[i]);
|
||||
btrfs_release_delayed_node(delayed_nodes[i]);
|
||||
}
|
||||
|
|
|
@ -930,7 +930,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
|||
is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
|
||||
|
||||
ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
|
||||
BUG_ON(extent_op && extent_op->is_data);
|
||||
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
|
||||
if (!ref)
|
||||
return -ENOMEM;
|
||||
|
@ -1103,8 +1102,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
|||
return -ENOMEM;
|
||||
|
||||
init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
|
||||
BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
|
||||
false);
|
||||
BTRFS_UPDATE_DELAYED_HEAD, false, false);
|
||||
head_ref->extent_op = extent_op;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
|
|
@ -58,7 +58,6 @@ struct btrfs_delayed_extent_op {
|
|||
u8 level;
|
||||
bool update_key;
|
||||
bool update_flags;
|
||||
bool is_data;
|
||||
u64 flags_to_set;
|
||||
};
|
||||
|
||||
|
|
|
@ -474,6 +474,7 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_dev_extent *dev_extent = NULL;
|
||||
struct btrfs_block_group *cache;
|
||||
struct btrfs_trans_handle *trans;
|
||||
int iter_ret = 0;
|
||||
int ret = 0;
|
||||
u64 chunk_offset;
|
||||
|
||||
|
@ -524,29 +525,8 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
|
|||
key.type = BTRFS_DEV_EXTENT_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto free_path;
|
||||
if (ret > 0) {
|
||||
if (path->slots[0] >=
|
||||
btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto free_path;
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
goto free_path;
|
||||
}
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
|
||||
if (found_key.objectid != src_dev->devid)
|
||||
break;
|
||||
|
@ -557,30 +537,23 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
|
|||
if (found_key.offset < key.offset)
|
||||
break;
|
||||
|
||||
dev_extent = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
|
||||
dev_extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
|
||||
|
||||
chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dev_extent);
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||
if (!cache)
|
||||
goto skip;
|
||||
continue;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
cache->to_copy = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
|
||||
skip:
|
||||
ret = btrfs_next_item(root, path);
|
||||
if (ret != 0) {
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (iter_ret < 0)
|
||||
ret = iter_ret;
|
||||
|
||||
free_path:
|
||||
btrfs_free_path(path);
|
||||
unlock:
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
|
@ -881,6 +854,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||
int scrub_ret)
|
||||
{
|
||||
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_device *tgt_device;
|
||||
struct btrfs_device *src_device;
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
|
@ -930,12 +904,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||
WARN_ON(ret);
|
||||
|
||||
/* Prevent write_all_supers() during the finishing procedure */
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
/* Prevent new chunks being allocated on the source device */
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
|
||||
if (!list_empty(&src_device->post_commit_list)) {
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
} else {
|
||||
break;
|
||||
|
@ -972,7 +946,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||
error:
|
||||
up_write(&dev_replace->rwsem);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
btrfs_rm_dev_replace_blocked(fs_info);
|
||||
if (tgt_device)
|
||||
btrfs_destroy_dev_replace_tgtdev(tgt_device);
|
||||
|
@ -1001,8 +975,8 @@ error:
|
|||
|
||||
btrfs_assign_next_active_device(src_device, tgt_device);
|
||||
|
||||
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
|
||||
fs_info->fs_devices->rw_devices++;
|
||||
list_add(&tgt_device->dev_alloc_list, &fs_devices->alloc_list);
|
||||
fs_devices->rw_devices++;
|
||||
|
||||
up_write(&dev_replace->rwsem);
|
||||
btrfs_rm_dev_replace_blocked(fs_info);
|
||||
|
@ -1025,7 +999,7 @@ error:
|
|||
* belong to this filesystem.
|
||||
*/
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
/* replace the sysfs entry */
|
||||
btrfs_sysfs_remove_device(src_device);
|
||||
|
|
|
@ -325,36 +325,15 @@ btrfs_search_dir_index_item(struct btrfs_root *root,
|
|||
struct btrfs_path *path, u64 dirid,
|
||||
const char *name, int name_len)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_key key;
|
||||
u32 nritems;
|
||||
int ret;
|
||||
|
||||
key.objectid = dirid;
|
||||
key.type = BTRFS_DIR_INDEX_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
leaf = path->nodes[0];
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
|
||||
while (1) {
|
||||
if (path->slots[0] >= nritems) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
if (ret > 0)
|
||||
break;
|
||||
leaf = path->nodes[0];
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
btrfs_for_each_slot(root, &key, &key, path, ret) {
|
||||
if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
|
||||
break;
|
||||
|
||||
|
@ -362,10 +341,12 @@ btrfs_search_dir_index_item(struct btrfs_root *root,
|
|||
name, name_len);
|
||||
if (di)
|
||||
return di;
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
return NULL;
|
||||
/* Adjust return code if the key was not found in the next leaf. */
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kthread.h>
|
||||
|
@ -374,9 +373,9 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
|||
* @level: expected level, mandatory check
|
||||
* @first_key: expected key of first slot, skip check if NULL
|
||||
*/
|
||||
static int btree_read_extent_buffer_pages(struct extent_buffer *eb,
|
||||
u64 parent_transid, int level,
|
||||
struct btrfs_key *first_key)
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *eb,
|
||||
u64 parent_transid, int level,
|
||||
struct btrfs_key *first_key)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
struct extent_io_tree *io_tree;
|
||||
|
@ -486,7 +485,7 @@ static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
|
|||
uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
|
||||
fs_info->nodesize);
|
||||
|
||||
/* A dirty eb shouldn't disappear from buffer_radix */
|
||||
/* A dirty eb shouldn't disappear from extent_buffers */
|
||||
if (WARN_ON(!eb))
|
||||
return -EUCLEAN;
|
||||
|
||||
|
@ -519,7 +518,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec
|
|||
u64 found_start;
|
||||
struct extent_buffer *eb;
|
||||
|
||||
if (fs_info->sectorsize < PAGE_SIZE)
|
||||
if (fs_info->nodesize < PAGE_SIZE)
|
||||
return csum_dirty_subpage_buffers(fs_info, bvec);
|
||||
|
||||
eb = (struct extent_buffer *)page->private;
|
||||
|
@ -704,7 +703,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
|||
|
||||
ASSERT(page->private);
|
||||
|
||||
if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
|
||||
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
|
||||
return validate_subpage_buffer(page, start, end, mirror);
|
||||
|
||||
eb = (struct extent_buffer *)page->private;
|
||||
|
@ -850,8 +849,7 @@ static void run_one_async_free(struct btrfs_work *work)
|
|||
}
|
||||
|
||||
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags,
|
||||
u64 dio_file_offset,
|
||||
int mirror_num, u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
|
@ -874,9 +872,9 @@ blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
|||
async->status = 0;
|
||||
|
||||
if (op_is_sync(bio->bi_opf))
|
||||
btrfs_set_work_high_priority(&async->work);
|
||||
|
||||
btrfs_queue_work(fs_info->workers, &async->work);
|
||||
btrfs_queue_work(fs_info->hipri_workers, &async->work);
|
||||
else
|
||||
btrfs_queue_work(fs_info->workers, &async->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -920,8 +918,7 @@ static bool should_async_write(struct btrfs_fs_info *fs_info,
|
|||
return true;
|
||||
}
|
||||
|
||||
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags)
|
||||
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
blk_status_t ret;
|
||||
|
@ -933,31 +930,25 @@ blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
|||
*/
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
||||
BTRFS_WQ_ENDIO_METADATA);
|
||||
if (ret)
|
||||
goto out_w_error;
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
if (!ret)
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
|
||||
ret = btree_csum_one_bio(bio);
|
||||
if (ret)
|
||||
goto out_w_error;
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
if (!ret)
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
} else {
|
||||
/*
|
||||
* kthread helpers are used to submit writes so that
|
||||
* checksumming can happen in parallel across all CPUs
|
||||
*/
|
||||
ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
|
||||
0, btree_submit_bio_start);
|
||||
btree_submit_bio_start);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_w_error;
|
||||
return 0;
|
||||
|
||||
out_w_error:
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
return ret;
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MIGRATION
|
||||
|
@ -1118,12 +1109,15 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||
if (IS_ERR(buf))
|
||||
return buf;
|
||||
|
||||
ret = btree_read_extent_buffer_pages(buf, parent_transid,
|
||||
level, first_key);
|
||||
ret = btrfs_read_extent_buffer(buf, parent_transid, level, first_key);
|
||||
if (ret) {
|
||||
free_extent_buffer_stale(buf);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
if (btrfs_check_eb_owner(buf, owner_root)) {
|
||||
free_extent_buffer_stale(buf);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
return buf;
|
||||
|
||||
}
|
||||
|
@ -1164,7 +1158,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
|||
root->nr_delalloc_inodes = 0;
|
||||
root->nr_ordered_extents = 0;
|
||||
root->inode_tree = RB_ROOT;
|
||||
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
|
||||
xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
|
||||
|
||||
btrfs_init_root_block_rsv(root);
|
||||
|
||||
|
@ -1216,9 +1210,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
|||
btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
INIT_LIST_HEAD(&root->leak_list);
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1563,6 +1557,23 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
|
|||
ret = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* For real fs, and not log/reloc trees, root owner must
|
||||
* match its root node owner
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state) &&
|
||||
root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
|
||||
root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
|
||||
root->root_key.objectid != btrfs_header_owner(root->node)) {
|
||||
btrfs_crit(fs_info,
|
||||
"root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
|
||||
root->root_key.objectid, root->node->start,
|
||||
btrfs_header_owner(root->node),
|
||||
root->root_key.objectid);
|
||||
ret = -EUCLEAN;
|
||||
goto fail;
|
||||
}
|
||||
root->commit_root = btrfs_root_node(root);
|
||||
return root;
|
||||
fail:
|
||||
|
@ -1648,12 +1659,11 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
|
|||
{
|
||||
struct btrfs_root *root;
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
root = radix_tree_lookup(&fs_info->fs_roots_radix,
|
||||
(unsigned long)root_id);
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
|
||||
if (root)
|
||||
root = btrfs_grab_root(root);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
return root;
|
||||
}
|
||||
|
||||
|
@ -1695,20 +1705,14 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
|||
{
|
||||
int ret;
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
ret = radix_tree_insert(&fs_info->fs_roots_radix,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
root);
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
|
||||
root, GFP_NOFS);
|
||||
if (ret == 0) {
|
||||
btrfs_grab_root(root);
|
||||
set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
|
||||
set_bit(BTRFS_ROOT_REGISTERED, &root->state);
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
radix_tree_preload_end();
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1964,7 +1968,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
|
|||
|
||||
static int cleaner_kthread(void *arg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
|
||||
struct btrfs_fs_info *fs_info = arg;
|
||||
int again;
|
||||
|
||||
while (1) {
|
||||
|
@ -2266,10 +2270,12 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
|||
{
|
||||
btrfs_destroy_workqueue(fs_info->fixup_workers);
|
||||
btrfs_destroy_workqueue(fs_info->delalloc_workers);
|
||||
btrfs_destroy_workqueue(fs_info->hipri_workers);
|
||||
btrfs_destroy_workqueue(fs_info->workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
|
||||
btrfs_destroy_workqueue(fs_info->rmw_workers);
|
||||
if (fs_info->rmw_workers)
|
||||
destroy_workqueue(fs_info->rmw_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_write_workers);
|
||||
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
|
||||
btrfs_destroy_workqueue(fs_info->delayed_workers);
|
||||
|
@ -2336,9 +2342,9 @@ void btrfs_put_root(struct btrfs_root *root)
|
|||
btrfs_drew_lock_destroy(&root->snapshot_lock);
|
||||
free_root_extent_buffers(root);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
spin_lock(&root->fs_info->fs_roots_radix_lock);
|
||||
spin_lock(&root->fs_info->fs_roots_lock);
|
||||
list_del_init(&root->leak_list);
|
||||
spin_unlock(&root->fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&root->fs_info->fs_roots_lock);
|
||||
#endif
|
||||
kfree(root);
|
||||
}
|
||||
|
@ -2346,28 +2352,21 @@ void btrfs_put_root(struct btrfs_root *root)
|
|||
|
||||
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_root *gang[8];
|
||||
int i;
|
||||
struct btrfs_root *root;
|
||||
unsigned long index = 0;
|
||||
|
||||
while (!list_empty(&fs_info->dead_roots)) {
|
||||
gang[0] = list_entry(fs_info->dead_roots.next,
|
||||
struct btrfs_root, root_list);
|
||||
list_del(&gang[0]->root_list);
|
||||
root = list_entry(fs_info->dead_roots.next,
|
||||
struct btrfs_root, root_list);
|
||||
list_del(&root->root_list);
|
||||
|
||||
if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
|
||||
btrfs_drop_and_free_fs_root(fs_info, gang[0]);
|
||||
btrfs_put_root(gang[0]);
|
||||
if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
|
||||
btrfs_drop_and_free_fs_root(fs_info, root);
|
||||
btrfs_put_root(root);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
(void **)gang, 0,
|
||||
ARRAY_SIZE(gang));
|
||||
if (!ret)
|
||||
break;
|
||||
for (i = 0; i < ret; i++)
|
||||
btrfs_drop_and_free_fs_root(fs_info, gang[i]);
|
||||
xa_for_each(&fs_info->fs_roots, index, root) {
|
||||
btrfs_drop_and_free_fs_root(fs_info, root);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2444,7 +2443,9 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
|||
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
|
||||
|
||||
fs_info->workers =
|
||||
btrfs_alloc_workqueue(fs_info, "worker",
|
||||
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
|
||||
fs_info->hipri_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "worker-high",
|
||||
flags | WQ_HIGHPRI, max_active, 16);
|
||||
|
||||
fs_info->delalloc_workers =
|
||||
|
@ -2476,8 +2477,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
|||
fs_info->endio_raid56_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
|
||||
max_active, 4);
|
||||
fs_info->rmw_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
|
||||
fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
|
||||
fs_info->endio_write_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "endio-write", flags,
|
||||
max_active, 2);
|
||||
|
@ -2492,8 +2492,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
|
|||
fs_info->discard_ctl.discard_workers =
|
||||
alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1);
|
||||
|
||||
if (!(fs_info->workers && fs_info->delalloc_workers &&
|
||||
fs_info->flush_workers &&
|
||||
if (!(fs_info->workers && fs_info->hipri_workers &&
|
||||
fs_info->delalloc_workers && fs_info->flush_workers &&
|
||||
fs_info->endio_workers && fs_info->endio_meta_workers &&
|
||||
fs_info->endio_meta_write_workers &&
|
||||
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
|
||||
|
@ -2815,12 +2815,14 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
|
||||
/*
|
||||
* For 4K page size, we only support 4K sector size.
|
||||
* For 64K page size, we support 64K and 4K sector sizes.
|
||||
* We only support at most two sectorsizes: 4K and PAGE_SIZE.
|
||||
*
|
||||
* We can support 16K sectorsize with 64K page size without problem,
|
||||
* but such sectorsize/pagesize combination doesn't make much sense.
|
||||
* 4K will be our future standard, PAGE_SIZE is supported from the very
|
||||
* beginning.
|
||||
*/
|
||||
if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
|
||||
(PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
|
||||
sectorsize != SZ_64K))) {
|
||||
if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && sectorsize != PAGE_SIZE)) {
|
||||
btrfs_err(fs_info,
|
||||
"sectorsize %llu not yet supported for page size %lu",
|
||||
sectorsize, PAGE_SIZE);
|
||||
|
@ -3132,8 +3134,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
|||
|
||||
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
|
||||
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
|
||||
xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
|
||||
xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
|
||||
INIT_LIST_HEAD(&fs_info->trans_list);
|
||||
INIT_LIST_HEAD(&fs_info->dead_roots);
|
||||
INIT_LIST_HEAD(&fs_info->delayed_iputs);
|
||||
|
@ -3141,7 +3143,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
|||
INIT_LIST_HEAD(&fs_info->caching_block_groups);
|
||||
spin_lock_init(&fs_info->delalloc_root_lock);
|
||||
spin_lock_init(&fs_info->trans_lock);
|
||||
spin_lock_init(&fs_info->fs_roots_radix_lock);
|
||||
spin_lock_init(&fs_info->fs_roots_lock);
|
||||
spin_lock_init(&fs_info->delayed_iput_lock);
|
||||
spin_lock_init(&fs_info->defrag_inodes_lock);
|
||||
spin_lock_init(&fs_info->super_lock);
|
||||
|
@ -3209,9 +3211,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
|||
btrfs_init_balance(fs_info);
|
||||
btrfs_init_async_reclaim_work(fs_info);
|
||||
|
||||
spin_lock_init(&fs_info->block_group_cache_lock);
|
||||
fs_info->block_group_cache_tree = RB_ROOT;
|
||||
fs_info->first_logical_byte = (u64)-1;
|
||||
rwlock_init(&fs_info->block_group_cache_lock);
|
||||
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
|
||||
|
||||
extent_io_tree_init(fs_info, &fs_info->excluded_extents,
|
||||
IO_TREE_FS_EXCLUDED_EXTENTS, NULL);
|
||||
|
@ -3295,7 +3296,7 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
|
|||
|
||||
static int btrfs_uuid_rescan_kthread(void *data)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
|
||||
struct btrfs_fs_info *fs_info = data;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
|
@ -3373,7 +3374,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
|
|||
/*
|
||||
* btrfs_find_orphan_roots() is responsible for finding all the dead
|
||||
* roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
|
||||
* them into the fs_info->fs_roots_radix tree. This must be done before
|
||||
* them into the fs_info->fs_roots. This must be done before
|
||||
* calling btrfs_orphan_cleanup() on the tree root. If we don't do it
|
||||
* first, then btrfs_orphan_cleanup() will delete a dead root's orphan
|
||||
* item before the root's tree is deleted - this means that if we unmount
|
||||
|
@ -3611,7 +3612,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||
~BTRFS_FEATURE_INCOMPAT_SUPP;
|
||||
if (features) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount because of unsupported optional features (%llx)",
|
||||
"cannot mount because of unsupported optional features (0x%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
|
@ -3649,7 +3650,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||
~BTRFS_FEATURE_COMPAT_RO_SUPP;
|
||||
if (!sb_rdonly(sb) && features) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount read-write because of unsupported optional features (%llx)",
|
||||
"cannot mount read-write because of unsupported optional features (0x%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
|
@ -3672,14 +3673,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||
btrfs_warn(fs_info,
|
||||
"read-write for sector size %u with page size %lu is experimental",
|
||||
sectorsize, PAGE_SIZE);
|
||||
if (btrfs_super_incompat_flags(fs_info->super_copy) &
|
||||
BTRFS_FEATURE_INCOMPAT_RAID56) {
|
||||
btrfs_err(fs_info,
|
||||
"RAID56 is not yet supported for sector size %u with page size %lu",
|
||||
sectorsize, PAGE_SIZE);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
|
||||
if (!subpage_info)
|
||||
goto fail_alloc;
|
||||
|
@ -4157,7 +4150,8 @@ static int write_dev_supers(struct btrfs_device *device,
|
|||
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
|
||||
bio->bi_opf |= REQ_FUA;
|
||||
|
||||
btrfsic_submit_bio(bio);
|
||||
btrfsic_check_bio(bio);
|
||||
submit_bio(bio);
|
||||
|
||||
if (btrfs_advance_sb_log(device, i))
|
||||
errors++;
|
||||
|
@ -4271,7 +4265,8 @@ static void write_dev_flush(struct btrfs_device *device)
|
|||
init_completion(&device->flush_wait);
|
||||
bio->bi_private = &device->flush_wait;
|
||||
|
||||
btrfsic_submit_bio(bio);
|
||||
btrfsic_check_bio(bio);
|
||||
submit_bio(bio);
|
||||
set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
|
||||
}
|
||||
|
||||
|
@ -4504,12 +4499,11 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|||
{
|
||||
bool drop_ref = false;
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
radix_tree_delete(&fs_info->fs_roots_radix,
|
||||
(unsigned long)root->root_key.objectid);
|
||||
if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
|
||||
if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
|
||||
drop_ref = true;
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
ASSERT(root->log_root == NULL);
|
||||
|
@ -4525,50 +4519,48 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|||
|
||||
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
u64 root_objectid = 0;
|
||||
struct btrfs_root *gang[8];
|
||||
int i = 0;
|
||||
struct btrfs_root *roots[8];
|
||||
unsigned long index = 0;
|
||||
int i;
|
||||
int err = 0;
|
||||
unsigned int ret = 0;
|
||||
int grabbed;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
(void **)gang, root_objectid,
|
||||
ARRAY_SIZE(gang));
|
||||
if (!ret) {
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
break;
|
||||
}
|
||||
root_objectid = gang[ret - 1]->root_key.objectid + 1;
|
||||
struct btrfs_root *root;
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
/* Avoid to grab roots in dead_roots */
|
||||
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
|
||||
gang[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
/* grab all the search result for later use */
|
||||
gang[i] = btrfs_grab_root(gang[i]);
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
return err;
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (!gang[i])
|
||||
continue;
|
||||
root_objectid = gang[i]->root_key.objectid;
|
||||
err = btrfs_orphan_cleanup(gang[i]);
|
||||
if (err)
|
||||
grabbed = 0;
|
||||
xa_for_each_start(&fs_info->fs_roots, index, root, index) {
|
||||
/* Avoid grabbing roots in dead_roots */
|
||||
if (btrfs_root_refs(&root->root_item) > 0)
|
||||
roots[grabbed++] = btrfs_grab_root(root);
|
||||
if (grabbed >= ARRAY_SIZE(roots))
|
||||
break;
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
root_objectid++;
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
|
||||
for (i = 0; i < grabbed; i++) {
|
||||
if (!roots[i])
|
||||
continue;
|
||||
index = roots[i]->root_key.objectid;
|
||||
err = btrfs_orphan_cleanup(roots[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
btrfs_put_root(roots[i]);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
/* release the uncleaned roots due to error */
|
||||
for (; i < ret; i++) {
|
||||
if (gang[i])
|
||||
btrfs_put_root(gang[i]);
|
||||
out:
|
||||
/* Release the roots that remain uncleaned due to error */
|
||||
for (; i < grabbed; i++) {
|
||||
if (roots[i])
|
||||
btrfs_put_root(roots[i]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
@ -4863,13 +4855,6 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
|
|||
__btrfs_btree_balance_dirty(fs_info, 0);
|
||||
}
|
||||
|
||||
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
|
||||
struct btrfs_key *first_key)
|
||||
{
|
||||
return btree_read_extent_buffer_pages(buf, parent_transid,
|
||||
level, first_key);
|
||||
}
|
||||
|
||||
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/* cleanup FS via transaction */
|
||||
|
@ -4885,31 +4870,28 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
|
|||
|
||||
static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *gang[8];
|
||||
u64 root_objectid = 0;
|
||||
int ret;
|
||||
unsigned long index = 0;
|
||||
int grabbed = 0;
|
||||
struct btrfs_root *roots[8];
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
(void **)gang, root_objectid,
|
||||
ARRAY_SIZE(gang))) != 0) {
|
||||
int i;
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
|
||||
ULONG_MAX, 8, XA_PRESENT))) {
|
||||
for (int i = 0; i < grabbed; i++)
|
||||
roots[i] = btrfs_grab_root(roots[i]);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
|
||||
for (i = 0; i < ret; i++)
|
||||
gang[i] = btrfs_grab_root(gang[i]);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (!gang[i])
|
||||
for (int i = 0; i < grabbed; i++) {
|
||||
if (!roots[i])
|
||||
continue;
|
||||
root_objectid = gang[i]->root_key.objectid;
|
||||
btrfs_free_log(NULL, gang[i]);
|
||||
btrfs_put_root(gang[i]);
|
||||
index = roots[i]->root_key.objectid;
|
||||
btrfs_free_log(NULL, roots[i]);
|
||||
btrfs_put_root(roots[i]);
|
||||
}
|
||||
root_objectid++;
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
index++;
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
btrfs_free_log_root_tree(NULL, fs_info);
|
||||
}
|
||||
|
||||
|
|
|
@ -87,8 +87,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|||
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
||||
struct page *page, u64 start, u64 end,
|
||||
int mirror);
|
||||
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num);
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
||||
#endif
|
||||
|
@ -120,13 +119,12 @@ void btrfs_put_root(struct btrfs_root *root);
|
|||
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
|
||||
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
|
||||
struct btrfs_key *first_key);
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
|
||||
int level, struct btrfs_key *first_key);
|
||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
enum btrfs_wq_endio_type metadata);
|
||||
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags,
|
||||
u64 dio_file_offset,
|
||||
int mirror_num, u64 dio_file_offset,
|
||||
extent_submit_bio_start_t *submit_bio_start);
|
||||
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
||||
int mirror_num);
|
||||
|
|
|
@ -895,7 +895,13 @@ again:
|
|||
err = -ENOENT;
|
||||
while (1) {
|
||||
if (ptr >= end) {
|
||||
WARN_ON(ptr > end);
|
||||
if (ptr > end) {
|
||||
err = -EUCLEAN;
|
||||
btrfs_print_leaf(path->nodes[0]);
|
||||
btrfs_crit(fs_info,
|
||||
"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
|
||||
path->slots[0], root_objectid, owner, offset, parent);
|
||||
}
|
||||
break;
|
||||
}
|
||||
iref = (struct btrfs_extent_inline_ref *)ptr;
|
||||
|
@ -1577,12 +1583,12 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
|||
u32 item_size;
|
||||
int ret;
|
||||
int err = 0;
|
||||
int metadata = !extent_op->is_data;
|
||||
int metadata = 1;
|
||||
|
||||
if (TRANS_ABORTED(trans))
|
||||
return 0;
|
||||
|
||||
if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
|
||||
if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
|
||||
metadata = 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
|
@ -2180,7 +2186,7 @@ out:
|
|||
|
||||
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *eb, u64 flags,
|
||||
int level, int is_data)
|
||||
int level)
|
||||
{
|
||||
struct btrfs_delayed_extent_op *extent_op;
|
||||
int ret;
|
||||
|
@ -2192,7 +2198,6 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
|||
extent_op->flags_to_set = flags;
|
||||
extent_op->update_flags = true;
|
||||
extent_op->update_key = false;
|
||||
extent_op->is_data = is_data ? true : false;
|
||||
extent_op->level = level;
|
||||
|
||||
ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
|
||||
|
@ -2357,15 +2362,10 @@ out:
|
|||
}
|
||||
|
||||
int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
||||
u64 bytenr, bool strict)
|
||||
u64 bytenr, bool strict, struct btrfs_path *path)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
do {
|
||||
ret = check_committed_ref(root, path, objectid,
|
||||
offset, bytenr, strict);
|
||||
|
@ -2376,7 +2376,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
|||
} while (ret == -EAGAIN);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
btrfs_release_path(path);
|
||||
if (btrfs_is_data_reloc_root(root))
|
||||
WARN_ON(ret > 0);
|
||||
return ret;
|
||||
|
@ -2497,24 +2497,21 @@ static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
|
||||
static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
u64 bytenr;
|
||||
struct rb_node *leftmost;
|
||||
u64 bytenr = 0;
|
||||
|
||||
spin_lock(&fs_info->block_group_cache_lock);
|
||||
bytenr = fs_info->first_logical_byte;
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
read_lock(&fs_info->block_group_cache_lock);
|
||||
/* Get the block group with the lowest logical start address. */
|
||||
leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
|
||||
if (leftmost) {
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
if (bytenr < (u64)-1)
|
||||
return bytenr;
|
||||
|
||||
cache = btrfs_lookup_first_block_group(fs_info, search_start);
|
||||
if (!cache)
|
||||
return 0;
|
||||
|
||||
bytenr = cache->start;
|
||||
btrfs_put_block_group(cache);
|
||||
bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
|
||||
bytenr = bg->start;
|
||||
}
|
||||
read_unlock(&fs_info->block_group_cache_lock);
|
||||
|
||||
return bytenr;
|
||||
}
|
||||
|
@ -3803,8 +3800,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
|||
|
||||
/* Check RO and no space case before trying to activate it */
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->ro ||
|
||||
block_group->alloc_offset == block_group->zone_capacity) {
|
||||
if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
|
||||
ret = 1;
|
||||
/*
|
||||
* May need to clear fs_info->{treelog,data_reloc}_bg.
|
||||
|
@ -4272,7 +4268,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
|||
return ret;
|
||||
|
||||
ffe_ctl->search_start = max(ffe_ctl->search_start,
|
||||
first_logical_byte(fs_info, 0));
|
||||
first_logical_byte(fs_info));
|
||||
ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
|
||||
if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
|
||||
block_group = btrfs_lookup_block_group(fs_info,
|
||||
|
@ -4959,7 +4955,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|||
extent_op->flags_to_set = flags;
|
||||
extent_op->update_key = skinny_metadata ? false : true;
|
||||
extent_op->update_flags = true;
|
||||
extent_op->is_data = false;
|
||||
extent_op->level = level;
|
||||
|
||||
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
||||
|
@ -5144,7 +5139,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
|
|||
ret = btrfs_dec_ref(trans, root, eb, 0);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
ret = btrfs_set_disk_extent_flags(trans, eb, flag,
|
||||
btrfs_header_level(eb), 0);
|
||||
btrfs_header_level(eb));
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
wc->flags[level] |= flag;
|
||||
}
|
||||
|
@ -5818,7 +5813,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
|||
btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
|
||||
btrfs_qgroup_free_meta_all_pertrans(root);
|
||||
|
||||
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
|
||||
if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
|
||||
btrfs_add_dropped_root(trans, root);
|
||||
else
|
||||
btrfs_put_root(root);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -7,15 +7,9 @@
|
|||
#include <linux/refcount.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/btrfs_tree.h>
|
||||
#include "compression.h"
|
||||
#include "ulist.h"
|
||||
|
||||
/*
|
||||
* flags for bio submission. The high bits indicate the compression
|
||||
* type for this bio
|
||||
*/
|
||||
#define EXTENT_BIO_COMPRESSED 1
|
||||
#define EXTENT_BIO_FLAG_SHIFT 16
|
||||
|
||||
enum {
|
||||
EXTENT_BUFFER_UPTODATE,
|
||||
EXTENT_BUFFER_DIRTY,
|
||||
|
@ -32,7 +26,6 @@ enum {
|
|||
/* write IO error */
|
||||
EXTENT_BUFFER_WRITE_ERR,
|
||||
EXTENT_BUFFER_NO_CHECK,
|
||||
EXTENT_BUFFER_ZONE_FINISH,
|
||||
};
|
||||
|
||||
/* these are flags for __process_pages_contig */
|
||||
|
@ -71,9 +64,9 @@ struct btrfs_fs_info;
|
|||
struct io_failure_record;
|
||||
struct extent_io_tree;
|
||||
|
||||
typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
|
||||
typedef void (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
|
||||
int mirror_num,
|
||||
unsigned long bio_flags);
|
||||
enum btrfs_compression_type compress_type);
|
||||
|
||||
typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode,
|
||||
struct bio *bio, u64 dio_file_offset);
|
||||
|
@ -102,17 +95,6 @@ struct extent_buffer {
|
|||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure to record info about the bio being assembled, and other info like
|
||||
* how many bytes are there before stripe/ordered extent boundary.
|
||||
*/
|
||||
struct btrfs_bio_ctrl {
|
||||
struct bio *bio;
|
||||
unsigned long bio_flags;
|
||||
u32 len_to_stripe_boundary;
|
||||
u32 len_to_oe_boundary;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure to record how many bytes and which ranges are set/cleared
|
||||
*/
|
||||
|
@ -158,17 +140,6 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
|
|||
kfree(changeset);
|
||||
}
|
||||
|
||||
static inline void extent_set_compress_type(unsigned long *bio_flags,
|
||||
int compress_type)
|
||||
{
|
||||
*bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
|
||||
}
|
||||
|
||||
static inline int extent_compress_type(unsigned long bio_flags)
|
||||
{
|
||||
return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
|
||||
}
|
||||
|
||||
struct extent_map_tree;
|
||||
|
||||
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
||||
|
@ -178,11 +149,7 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
|||
int try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||
int try_release_extent_buffer(struct page *page);
|
||||
|
||||
int __must_check submit_one_bio(struct bio *bio, int mirror_num,
|
||||
unsigned long bio_flags);
|
||||
int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
||||
struct btrfs_bio_ctrl *bio_ctrl,
|
||||
unsigned int read_flags, u64 *prev_em_start);
|
||||
int btrfs_readpage(struct file *file, struct page *page);
|
||||
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
|
||||
int extent_writepages(struct address_space *mapping,
|
||||
|
@ -277,8 +244,10 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
|||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct page *locked_page,
|
||||
u32 bits_to_clear, unsigned long page_ops);
|
||||
|
||||
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
|
||||
struct bio *btrfs_bio_clone(struct bio *bio);
|
||||
struct bio *btrfs_bio_clone(struct block_device *bdev, struct bio *bio);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
|
||||
|
||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||
|
@ -297,7 +266,7 @@ struct io_failure_record {
|
|||
u64 start;
|
||||
u64 len;
|
||||
u64 logical;
|
||||
unsigned long bio_flags;
|
||||
enum btrfs_compression_type compress_type;
|
||||
int this_mirror;
|
||||
int failed_mirror;
|
||||
};
|
||||
|
|
286
fs/btrfs/file.c
286
fs/btrfs/file.c
|
@ -1460,8 +1460,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes, bool nowait)
|
||||
/*
|
||||
* Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
|
||||
*
|
||||
* @pos: File offset.
|
||||
* @write_bytes: The length to write, will be updated to the nocow writeable
|
||||
* range.
|
||||
*
|
||||
* This function will flush ordered extents in the range to ensure proper
|
||||
* nocow checks.
|
||||
*
|
||||
* Return:
|
||||
* > 0 If we can nocow, and updates @write_bytes.
|
||||
* 0 If we can't do a nocow write.
|
||||
* -EAGAIN If we can't do a nocow write because snapshoting of the inode's
|
||||
* root is in progress.
|
||||
* < 0 If an error happened.
|
||||
*
|
||||
* NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0.
|
||||
*/
|
||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_root *root = inode->root;
|
||||
|
@ -1472,7 +1491,7 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
|||
if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
|
||||
return 0;
|
||||
|
||||
if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
|
||||
if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
|
||||
return -EAGAIN;
|
||||
|
||||
lockstart = round_down(pos, fs_info->sectorsize);
|
||||
|
@ -1480,71 +1499,21 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
|||
fs_info->sectorsize) - 1;
|
||||
num_bytes = lockend - lockstart + 1;
|
||||
|
||||
if (nowait) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
|
||||
return -EAGAIN;
|
||||
|
||||
ordered = btrfs_lookup_ordered_range(inode, lockstart,
|
||||
num_bytes);
|
||||
if (ordered) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
} else {
|
||||
btrfs_lock_and_flush_ordered_range(inode, lockstart,
|
||||
lockend, NULL);
|
||||
}
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, NULL);
|
||||
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
||||
NULL, NULL, NULL, false);
|
||||
if (ret <= 0) {
|
||||
ret = 0;
|
||||
if (!nowait)
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
} else {
|
||||
*write_bytes = min_t(size_t, *write_bytes ,
|
||||
num_bytes - pos + lockstart);
|
||||
}
|
||||
out_unlock:
|
||||
unlock_extent(&inode->io_tree, lockstart, lockend);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes)
|
||||
{
|
||||
return check_can_nocow(inode, pos, write_bytes, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
|
||||
*
|
||||
* @pos: File offset
|
||||
* @write_bytes: The length to write, will be updated to the nocow writeable
|
||||
* range
|
||||
*
|
||||
* This function will flush ordered extents in the range to ensure proper
|
||||
* nocow checks.
|
||||
*
|
||||
* Return:
|
||||
* >0 and update @write_bytes if we can do nocow write
|
||||
* 0 if we can't do nocow write
|
||||
* -EAGAIN if we can't get the needed lock or there are ordered extents
|
||||
* for * (nowait == true) case
|
||||
* <0 if other error happened
|
||||
*
|
||||
* NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
|
||||
*/
|
||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes)
|
||||
{
|
||||
return check_can_nocow(inode, pos, write_bytes, false);
|
||||
}
|
||||
|
||||
void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
|
||||
{
|
||||
btrfs_drew_write_unlock(&inode->root->snapshot_lock);
|
||||
|
@ -1579,20 +1548,15 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
|
|||
loff_t oldsize;
|
||||
loff_t start_pos;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
size_t nocow_bytes = count;
|
||||
|
||||
/* We will allocate space in case nodatacow is not set, so bail */
|
||||
if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) <= 0)
|
||||
return -EAGAIN;
|
||||
/*
|
||||
* There are holes in the range or parts of the range that must
|
||||
* be COWed (shared extents, RO block groups, etc), so just bail
|
||||
* out.
|
||||
*/
|
||||
if (nocow_bytes < count)
|
||||
return -EAGAIN;
|
||||
}
|
||||
/*
|
||||
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
|
||||
* prealloc flags, as without those flags we always have to COW. We will
|
||||
* later check if we can really COW into the target range (using
|
||||
* can_nocow_extent() at btrfs_get_blocks_direct_write()).
|
||||
*/
|
||||
if ((iocb->ki_flags & IOCB_NOWAIT) &&
|
||||
!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
|
||||
return -EAGAIN;
|
||||
|
||||
current->backing_dev_info = inode_to_bdi(inode);
|
||||
ret = file_remove_privs(file);
|
||||
|
@ -1720,7 +1684,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
|
|||
WARN_ON(reserve_bytes == 0);
|
||||
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
|
||||
reserve_bytes,
|
||||
reserve_bytes);
|
||||
reserve_bytes, false);
|
||||
if (ret) {
|
||||
if (!only_release_metadata)
|
||||
btrfs_free_reserved_data_space(BTRFS_I(inode),
|
||||
|
@ -1965,8 +1929,7 @@ relock:
|
|||
*/
|
||||
again:
|
||||
from->nofault = true;
|
||||
err = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
|
||||
IOMAP_DIO_PARTIAL, written);
|
||||
err = btrfs_dio_rw(iocb, from, written);
|
||||
from->nofault = false;
|
||||
|
||||
/* No increment (+=) because iomap returns a cumulative value. */
|
||||
|
@ -2570,10 +2533,10 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_punch_hole_lock_range(struct inode *inode,
|
||||
const u64 lockstart,
|
||||
const u64 lockend,
|
||||
struct extent_state **cached_state)
|
||||
static void btrfs_punch_hole_lock_range(struct inode *inode,
|
||||
const u64 lockstart,
|
||||
const u64 lockend,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
/*
|
||||
* For subpage case, if the range is not at page boundary, we could
|
||||
|
@ -2587,40 +2550,29 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
|
|||
const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
|
||||
|
||||
while (1) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
int ret;
|
||||
|
||||
truncate_pagecache_range(inode, lockstart, lockend);
|
||||
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
cached_state);
|
||||
ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode),
|
||||
lockend);
|
||||
|
||||
/*
|
||||
* We need to make sure we have no ordered extents in this range
|
||||
* and nobody raced in and read a page in this range, if we did
|
||||
* we need to try again.
|
||||
* We can't have ordered extents in the range, nor dirty/writeback
|
||||
* pages, because we have locked the inode's VFS lock in exclusive
|
||||
* mode, we have locked the inode's i_mmap_lock in exclusive mode,
|
||||
* we have flushed all delalloc in the range and we have waited
|
||||
* for any ordered extents in the range to complete.
|
||||
* We can race with anyone reading pages from this range, so after
|
||||
* locking the range check if we have pages in the range, and if
|
||||
* we do, unlock the range and retry.
|
||||
*/
|
||||
if ((!ordered ||
|
||||
(ordered->file_offset + ordered->num_bytes <= lockstart ||
|
||||
ordered->file_offset > lockend)) &&
|
||||
!filemap_range_has_page(inode->i_mapping,
|
||||
page_lockstart, page_lockend)) {
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
if (!filemap_range_has_page(inode->i_mapping, page_lockstart,
|
||||
page_lockend))
|
||||
break;
|
||||
}
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockend, cached_state);
|
||||
ret = btrfs_wait_ordered_range(inode, lockstart,
|
||||
lockend - lockstart + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode), lockstart, lockend);
|
||||
}
|
||||
|
||||
static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
|
||||
|
@ -2976,11 +2928,12 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
|
|||
bool truncated_block = false;
|
||||
bool updated_inode = false;
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode, offset, len);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_only_mutex;
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
ino_size = round_up(inode->i_size, fs_info->sectorsize);
|
||||
ret = find_first_non_hole(BTRFS_I(inode), &offset, &len);
|
||||
if (ret < 0)
|
||||
|
@ -3072,10 +3025,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
|
|||
goto out_only_mutex;
|
||||
}
|
||||
|
||||
ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
|
||||
&cached_state);
|
||||
if (ret)
|
||||
goto out_only_mutex;
|
||||
btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
|
@ -3237,8 +3187,6 @@ static int btrfs_zero_range(struct inode *inode,
|
|||
u64 bytes_to_reserve = 0;
|
||||
bool space_reserved = false;
|
||||
|
||||
inode_dio_wait(inode);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
|
||||
alloc_end - alloc_start);
|
||||
if (IS_ERR(em)) {
|
||||
|
@ -3368,10 +3316,8 @@ reserve_space:
|
|||
if (ret < 0)
|
||||
goto out;
|
||||
space_reserved = true;
|
||||
ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
|
||||
&cached_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
btrfs_punch_hole_lock_range(inode, lockstart, lockend,
|
||||
&cached_state);
|
||||
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
|
||||
alloc_start, bytes_to_reserve);
|
||||
if (ret) {
|
||||
|
@ -3417,6 +3363,9 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
u64 alloc_hint = 0;
|
||||
u64 locked_end;
|
||||
u64 actual_end = 0;
|
||||
u64 data_space_needed = 0;
|
||||
u64 data_space_reserved = 0;
|
||||
u64 qgroup_reserved = 0;
|
||||
struct extent_map *em;
|
||||
int blocksize = btrfs_inode_sectorsize(BTRFS_I(inode));
|
||||
int ret;
|
||||
|
@ -3437,18 +3386,6 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
if (mode & FALLOC_FL_PUNCH_HOLE)
|
||||
return btrfs_punch_hole(file, offset, len);
|
||||
|
||||
/*
|
||||
* Only trigger disk allocation, don't trigger qgroup reserve
|
||||
*
|
||||
* For qgroup space, it will be checked later.
|
||||
*/
|
||||
if (!(mode & FALLOC_FL_ZERO_RANGE)) {
|
||||
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
|
||||
alloc_end - alloc_start);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
|
||||
|
@ -3485,8 +3422,12 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
}
|
||||
|
||||
/*
|
||||
* wait for ordered IO before we have any locks. We'll loop again
|
||||
* below with the locks held.
|
||||
* We have locked the inode at the VFS level (in exclusive mode) and we
|
||||
* have locked the i_mmap_lock lock (in exclusive mode). Now before
|
||||
* locking the file range, flush all dealloc in the range and wait for
|
||||
* all ordered extents in the range to complete. After this we can lock
|
||||
* the file range and, due to the previous locking we did, we know there
|
||||
* can't be more delalloc or ordered extents in the range.
|
||||
*/
|
||||
ret = btrfs_wait_ordered_range(inode, alloc_start,
|
||||
alloc_end - alloc_start);
|
||||
|
@ -3500,38 +3441,10 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
}
|
||||
|
||||
locked_end = alloc_end - 1;
|
||||
while (1) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
|
||||
&cached_state);
|
||||
|
||||
/* the extent lock is ordered inside the running
|
||||
* transaction
|
||||
*/
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
|
||||
locked_end, &cached_state);
|
||||
ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode),
|
||||
locked_end);
|
||||
|
||||
if (ordered &&
|
||||
ordered->file_offset + ordered->num_bytes > alloc_start &&
|
||||
ordered->file_offset < alloc_end) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
|
||||
alloc_start, locked_end,
|
||||
&cached_state);
|
||||
/*
|
||||
* we can't wait on the range with the transaction
|
||||
* running or with the extent lock held
|
||||
*/
|
||||
ret = btrfs_wait_ordered_range(inode, alloc_start,
|
||||
alloc_end - alloc_start);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
break;
|
||||
}
|
||||
}
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
|
||||
|
||||
/* First, check if we exceed the qgroup limit */
|
||||
INIT_LIST_HEAD(&reserve_list);
|
||||
|
@ -3548,48 +3461,64 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
(cur_offset >= inode->i_size &&
|
||||
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
|
||||
ret = add_falloc_range(&reserve_list, cur_offset,
|
||||
last_byte - cur_offset);
|
||||
const u64 range_len = last_byte - cur_offset;
|
||||
|
||||
ret = add_falloc_range(&reserve_list, cur_offset, range_len);
|
||||
if (ret < 0) {
|
||||
free_extent_map(em);
|
||||
break;
|
||||
}
|
||||
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode),
|
||||
&data_reserved, cur_offset,
|
||||
last_byte - cur_offset);
|
||||
&data_reserved, cur_offset, range_len);
|
||||
if (ret < 0) {
|
||||
cur_offset = last_byte;
|
||||
free_extent_map(em);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Do not need to reserve unwritten extent for this
|
||||
* range, free reserved data space first, otherwise
|
||||
* it'll result in false ENOSPC error.
|
||||
*/
|
||||
btrfs_free_reserved_data_space(BTRFS_I(inode),
|
||||
data_reserved, cur_offset,
|
||||
last_byte - cur_offset);
|
||||
qgroup_reserved += range_len;
|
||||
data_space_needed += range_len;
|
||||
}
|
||||
free_extent_map(em);
|
||||
cur_offset = last_byte;
|
||||
}
|
||||
|
||||
if (!ret && data_space_needed > 0) {
|
||||
/*
|
||||
* We are safe to reserve space here as we can't have delalloc
|
||||
* in the range, see above.
|
||||
*/
|
||||
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
|
||||
data_space_needed);
|
||||
if (!ret)
|
||||
data_space_reserved = data_space_needed;
|
||||
}
|
||||
|
||||
/*
|
||||
* If ret is still 0, means we're OK to fallocate.
|
||||
* Or just cleanup the list and exit.
|
||||
*/
|
||||
list_for_each_entry_safe(range, tmp, &reserve_list, list) {
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
ret = btrfs_prealloc_file_range(inode, mode,
|
||||
range->start,
|
||||
range->len, i_blocksize(inode),
|
||||
offset + len, &alloc_hint);
|
||||
else
|
||||
/*
|
||||
* btrfs_prealloc_file_range() releases space even
|
||||
* if it returns an error.
|
||||
*/
|
||||
data_space_reserved -= range->len;
|
||||
qgroup_reserved -= range->len;
|
||||
} else if (data_space_reserved > 0) {
|
||||
btrfs_free_reserved_data_space(BTRFS_I(inode),
|
||||
data_reserved, range->start,
|
||||
range->len);
|
||||
data_reserved, range->start,
|
||||
range->len);
|
||||
data_space_reserved -= range->len;
|
||||
qgroup_reserved -= range->len;
|
||||
} else if (qgroup_reserved > 0) {
|
||||
btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved,
|
||||
range->start, range->len);
|
||||
qgroup_reserved -= range->len;
|
||||
}
|
||||
list_del(&range->list);
|
||||
kfree(range);
|
||||
}
|
||||
|
@ -3606,10 +3535,6 @@ out_unlock:
|
|||
&cached_state);
|
||||
out:
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
/* Let go of our reservation. */
|
||||
if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
|
||||
btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
|
||||
cur_offset, alloc_end - cur_offset);
|
||||
extent_changeset_free(data_reserved);
|
||||
return ret;
|
||||
}
|
||||
|
@ -3767,8 +3692,7 @@ again:
|
|||
*/
|
||||
pagefault_disable();
|
||||
to->nofault = true;
|
||||
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
|
||||
IOMAP_DIO_PARTIAL, read);
|
||||
ret = btrfs_dio_rw(iocb, to, read);
|
||||
to->nofault = false;
|
||||
pagefault_enable();
|
||||
|
||||
|
|
|
@ -2630,16 +2630,19 @@ out:
|
|||
static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size, bool used)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct btrfs_space_info *sinfo = block_group->space_info;
|
||||
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
||||
u64 offset = bytenr - block_group->start;
|
||||
u64 to_free, to_unusable;
|
||||
const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
|
||||
int bg_reclaim_threshold = 0;
|
||||
bool initial = (size == block_group->length);
|
||||
u64 reclaimable_unusable;
|
||||
|
||||
WARN_ON(!initial && offset + size > block_group->zone_capacity);
|
||||
|
||||
if (!initial)
|
||||
bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
|
||||
|
||||
spin_lock(&ctl->tree_lock);
|
||||
if (!used)
|
||||
to_free = size;
|
||||
|
@ -4069,7 +4072,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
|
|||
|
||||
btrfs_info(fs_info, "cleaning free space cache v1");
|
||||
|
||||
node = rb_first(&fs_info->block_group_cache_tree);
|
||||
node = rb_first_cached(&fs_info->block_group_cache_tree);
|
||||
while (node) {
|
||||
block_group = rb_entry(node, struct btrfs_block_group, cache_node);
|
||||
ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
|
||||
|
|
|
@ -1178,7 +1178,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
|
|||
goto abort;
|
||||
}
|
||||
|
||||
node = rb_first(&fs_info->block_group_cache_tree);
|
||||
node = rb_first_cached(&fs_info->block_group_cache_tree);
|
||||
while (node) {
|
||||
block_group = rb_entry(node, struct btrfs_block_group,
|
||||
cache_node);
|
||||
|
|
1872
fs/btrfs/inode.c
1872
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
280
fs/btrfs/ioctl.c
280
fs/btrfs/ioctl.c
|
@ -540,9 +540,35 @@ int __pure btrfs_is_empty_uuid(u8 *uuid)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of transaction items to reserve for creating a subvolume
|
||||
* or snapshot, not including the inode, directory entries, or parent directory.
|
||||
*/
|
||||
static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit)
|
||||
{
|
||||
/*
|
||||
* 1 to add root block
|
||||
* 1 to add root item
|
||||
* 1 to add root ref
|
||||
* 1 to add root backref
|
||||
* 1 to add UUID item
|
||||
* 1 to add qgroup info
|
||||
* 1 to add qgroup limit
|
||||
*
|
||||
* Ideally the last two would only be accounted if qgroups are enabled,
|
||||
* but that can change between now and the time we would insert them.
|
||||
*/
|
||||
unsigned int num_items = 7;
|
||||
|
||||
if (inherit) {
|
||||
/* 2 to add qgroup relations for each inherited qgroup */
|
||||
num_items += 2 * inherit->num_qgroups;
|
||||
}
|
||||
return num_items;
|
||||
}
|
||||
|
||||
static noinline int create_subvol(struct user_namespace *mnt_userns,
|
||||
struct inode *dir, struct dentry *dentry,
|
||||
const char *name, int namelen,
|
||||
struct btrfs_qgroup_inherit *inherit)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
|
||||
|
@ -555,11 +581,15 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
|
|||
struct btrfs_root *new_root;
|
||||
struct btrfs_block_rsv block_rsv;
|
||||
struct timespec64 cur_time = current_time(dir);
|
||||
struct inode *inode;
|
||||
struct btrfs_new_inode_args new_inode_args = {
|
||||
.dir = dir,
|
||||
.dentry = dentry,
|
||||
.subvol = true,
|
||||
};
|
||||
unsigned int trans_num_items;
|
||||
int ret;
|
||||
dev_t anon_dev = 0;
|
||||
dev_t anon_dev;
|
||||
u64 objectid;
|
||||
u64 index = 0;
|
||||
|
||||
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
|
||||
if (!root_item)
|
||||
|
@ -567,11 +597,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
|
|||
|
||||
ret = btrfs_get_free_objectid(fs_info->tree_root, &objectid);
|
||||
if (ret)
|
||||
goto fail_free;
|
||||
|
||||
ret = get_anon_bdev(&anon_dev);
|
||||
if (ret < 0)
|
||||
goto fail_free;
|
||||
goto out_root_item;
|
||||
|
||||
/*
|
||||
* Don't create subvolume whose level is not zero. Or qgroup will be
|
||||
|
@ -579,36 +605,47 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
|
|||
*/
|
||||
if (btrfs_qgroup_level(objectid)) {
|
||||
ret = -ENOSPC;
|
||||
goto fail_free;
|
||||
goto out_root_item;
|
||||
}
|
||||
|
||||
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
|
||||
/*
|
||||
* The same as the snapshot creation, please see the comment
|
||||
* of create_snapshot().
|
||||
*/
|
||||
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
|
||||
ret = get_anon_bdev(&anon_dev);
|
||||
if (ret < 0)
|
||||
goto out_root_item;
|
||||
|
||||
new_inode_args.inode = btrfs_new_subvol_inode(mnt_userns, dir);
|
||||
if (!new_inode_args.inode) {
|
||||
ret = -ENOMEM;
|
||||
goto out_anon_dev;
|
||||
}
|
||||
ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
|
||||
if (ret)
|
||||
goto fail_free;
|
||||
goto out_inode;
|
||||
trans_num_items += create_subvol_num_items(inherit);
|
||||
|
||||
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
|
||||
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
|
||||
trans_num_items, false);
|
||||
if (ret)
|
||||
goto out_new_inode_args;
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
btrfs_subvolume_release_metadata(root, &block_rsv);
|
||||
goto fail_free;
|
||||
goto out_new_inode_args;
|
||||
}
|
||||
trans->block_rsv = &block_rsv;
|
||||
trans->bytes_reserved = block_rsv.size;
|
||||
|
||||
ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
|
||||
if (ret)
|
||||
goto fail;
|
||||
goto out;
|
||||
|
||||
leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0,
|
||||
BTRFS_NESTING_NORMAL);
|
||||
if (IS_ERR(leaf)) {
|
||||
ret = PTR_ERR(leaf);
|
||||
goto fail;
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
@ -663,75 +700,46 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
|
|||
btrfs_tree_unlock(leaf);
|
||||
btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
|
||||
free_extent_buffer(leaf);
|
||||
goto fail;
|
||||
goto out;
|
||||
}
|
||||
|
||||
free_extent_buffer(leaf);
|
||||
leaf = NULL;
|
||||
|
||||
key.offset = (u64)-1;
|
||||
new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
|
||||
if (IS_ERR(new_root)) {
|
||||
free_anon_bdev(anon_dev);
|
||||
ret = PTR_ERR(new_root);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
goto out;
|
||||
}
|
||||
/* Freeing will be done in btrfs_put_root() of new_root */
|
||||
/* anon_dev is owned by new_root now. */
|
||||
anon_dev = 0;
|
||||
BTRFS_I(new_inode_args.inode)->root = new_root;
|
||||
/* ... and new_root is owned by new_inode_args.inode now. */
|
||||
|
||||
ret = btrfs_record_root_in_trans(trans, new_root);
|
||||
if (ret) {
|
||||
btrfs_put_root(new_root);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = btrfs_create_subvol_root(trans, new_root, root, mnt_userns);
|
||||
btrfs_put_root(new_root);
|
||||
if (ret) {
|
||||
/* We potentially lose an unused inode item here */
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* insert the directory item
|
||||
*/
|
||||
ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key,
|
||||
BTRFS_FT_DIR, index);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
|
||||
ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
|
||||
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
|
||||
BTRFS_UUID_KEY_SUBVOL, objectid);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
fail:
|
||||
kfree(root_item);
|
||||
ret = btrfs_create_new_inode(trans, &new_inode_args);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
d_instantiate_new(dentry, new_inode_args.inode);
|
||||
new_inode_args.inode = NULL;
|
||||
|
||||
out:
|
||||
trans->block_rsv = NULL;
|
||||
trans->bytes_reserved = 0;
|
||||
btrfs_subvolume_release_metadata(root, &block_rsv);
|
||||
|
@ -740,18 +748,14 @@ fail:
|
|||
btrfs_end_transaction(trans);
|
||||
else
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
|
||||
if (!ret) {
|
||||
inode = btrfs_lookup_dentry(dir, dentry);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
d_instantiate(dentry, inode);
|
||||
}
|
||||
return ret;
|
||||
|
||||
fail_free:
|
||||
out_new_inode_args:
|
||||
btrfs_new_inode_args_destroy(&new_inode_args);
|
||||
out_inode:
|
||||
iput(new_inode_args.inode);
|
||||
out_anon_dev:
|
||||
if (anon_dev)
|
||||
free_anon_bdev(anon_dev);
|
||||
out_root_item:
|
||||
kfree(root_item);
|
||||
return ret;
|
||||
}
|
||||
|
@ -763,6 +767,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
|||
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
|
||||
struct inode *inode;
|
||||
struct btrfs_pending_snapshot *pending_snapshot;
|
||||
unsigned int trans_num_items;
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
|
||||
|
@ -800,16 +805,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
|||
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
|
||||
BTRFS_BLOCK_RSV_TEMP);
|
||||
/*
|
||||
* 1 - parent dir inode
|
||||
* 2 - dir entries
|
||||
* 1 - root item
|
||||
* 2 - root ref/backref
|
||||
* 1 - root of snapshot
|
||||
* 1 - UUID item
|
||||
* 1 to add dir item
|
||||
* 1 to add dir index
|
||||
* 1 to update parent inode item
|
||||
*/
|
||||
trans_num_items = create_subvol_num_items(inherit) + 3;
|
||||
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
|
||||
&pending_snapshot->block_rsv, 8,
|
||||
false);
|
||||
&pending_snapshot->block_rsv,
|
||||
trans_num_items, false);
|
||||
if (ret)
|
||||
goto free_pending;
|
||||
|
||||
|
@ -979,7 +982,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
|
|||
if (snap_src)
|
||||
error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
|
||||
else
|
||||
error = create_subvol(mnt_userns, dir, dentry, name, namelen, inherit);
|
||||
error = create_subvol(mnt_userns, dir, dentry, inherit);
|
||||
|
||||
if (!error)
|
||||
fsnotify_mkdir(dir, dentry);
|
||||
|
@ -1413,8 +1416,19 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
|||
if (!em)
|
||||
break;
|
||||
|
||||
/* Skip hole/inline/preallocated extents */
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE ||
|
||||
/*
|
||||
* If the file extent is an inlined one, we may still want to
|
||||
* defrag it (fallthrough) if it will cause a regular extent.
|
||||
* This is for users who want to convert inline extents to
|
||||
* regular ones through max_inline= mount option.
|
||||
*/
|
||||
if (em->block_start == EXTENT_MAP_INLINE &&
|
||||
em->len <= inode->root->fs_info->max_inline)
|
||||
goto next;
|
||||
|
||||
/* Skip hole/delalloc/preallocated extents */
|
||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
em->block_start == EXTENT_MAP_DELALLOC ||
|
||||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
|
||||
goto next;
|
||||
|
||||
|
@ -1473,6 +1487,15 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
|||
if (em->len >= get_extent_max_capacity(em))
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* Normally there are no more extents after an inline one, thus
|
||||
* @next_mergeable will normally be false and not defragged.
|
||||
* So if an inline extent passed all above checks, just add it
|
||||
* for defrag, and be converted to regular extents.
|
||||
*/
|
||||
if (em->block_start == EXTENT_MAP_INLINE)
|
||||
goto add;
|
||||
|
||||
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
|
||||
extent_thresh, newer_than, locked);
|
||||
if (!next_mergeable) {
|
||||
|
@ -2594,7 +2617,7 @@ err:
|
|||
static noinline int btrfs_ioctl_tree_search(struct inode *inode,
|
||||
void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_search_args __user *uargs;
|
||||
struct btrfs_ioctl_search_args __user *uargs = argp;
|
||||
struct btrfs_ioctl_search_key sk;
|
||||
int ret;
|
||||
size_t buf_size;
|
||||
|
@ -2602,8 +2625,6 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode,
|
|||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
uargs = (struct btrfs_ioctl_search_args __user *)argp;
|
||||
|
||||
if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -2626,7 +2647,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode,
|
|||
static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
|
||||
void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_search_args_v2 __user *uarg;
|
||||
struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
|
||||
struct btrfs_ioctl_search_args_v2 args;
|
||||
int ret;
|
||||
size_t buf_size;
|
||||
|
@ -2636,7 +2657,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
|
|||
return -EPERM;
|
||||
|
||||
/* copy search header and buffer size */
|
||||
uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
|
||||
if (copy_from_user(&args, uarg, sizeof(args)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -4344,10 +4364,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
|
|||
bool need_unlock; /* for mut. excl. ops lock */
|
||||
int ret;
|
||||
|
||||
if (!arg)
|
||||
btrfs_warn(fs_info,
|
||||
"IOC_BALANCE ioctl (v1) is deprecated and will be removed in kernel 5.18");
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
|
@ -4355,6 +4371,13 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
bargs = memdup_user(arg, sizeof(*bargs));
|
||||
if (IS_ERR(bargs)) {
|
||||
ret = PTR_ERR(bargs);
|
||||
bargs = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
again:
|
||||
if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
|
@ -4402,59 +4425,42 @@ again:
|
|||
}
|
||||
|
||||
locked:
|
||||
|
||||
if (arg) {
|
||||
bargs = memdup_user(arg, sizeof(*bargs));
|
||||
if (IS_ERR(bargs)) {
|
||||
ret = PTR_ERR(bargs);
|
||||
if (bargs->flags & BTRFS_BALANCE_RESUME) {
|
||||
if (!fs_info->balance_ctl) {
|
||||
ret = -ENOTCONN;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (bargs->flags & BTRFS_BALANCE_RESUME) {
|
||||
if (!fs_info->balance_ctl) {
|
||||
ret = -ENOTCONN;
|
||||
goto out_bargs;
|
||||
}
|
||||
bctl = fs_info->balance_ctl;
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->flags |= BTRFS_BALANCE_RESUME;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE);
|
||||
|
||||
bctl = fs_info->balance_ctl;
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->flags |= BTRFS_BALANCE_RESUME;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE);
|
||||
goto do_balance;
|
||||
}
|
||||
|
||||
goto do_balance;
|
||||
}
|
||||
} else {
|
||||
bargs = NULL;
|
||||
if (bargs->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (fs_info->balance_ctl) {
|
||||
ret = -EINPROGRESS;
|
||||
goto out_bargs;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
|
||||
if (!bctl) {
|
||||
ret = -ENOMEM;
|
||||
goto out_bargs;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (arg) {
|
||||
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
|
||||
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
|
||||
memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
|
||||
|
||||
bctl->flags = bargs->flags;
|
||||
} else {
|
||||
/* balance everything - no filters */
|
||||
bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
|
||||
}
|
||||
|
||||
if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
|
||||
ret = -EINVAL;
|
||||
goto out_bctl;
|
||||
}
|
||||
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
|
||||
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
|
||||
memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
|
||||
|
||||
bctl->flags = bargs->flags;
|
||||
do_balance:
|
||||
/*
|
||||
* Ownership of bctl and exclusive operation goes to btrfs_balance.
|
||||
|
@ -4467,21 +4473,19 @@ do_balance:
|
|||
ret = btrfs_balance(fs_info, bctl, bargs);
|
||||
bctl = NULL;
|
||||
|
||||
if ((ret == 0 || ret == -ECANCELED) && arg) {
|
||||
if (ret == 0 || ret == -ECANCELED) {
|
||||
if (copy_to_user(arg, bargs, sizeof(*bargs)))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
out_bctl:
|
||||
kfree(bctl);
|
||||
out_bargs:
|
||||
kfree(bargs);
|
||||
out_unlock:
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
if (need_unlock)
|
||||
btrfs_exclop_finish(fs_info);
|
||||
out:
|
||||
mnt_drop_write_file(file);
|
||||
kfree(bargs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -380,9 +380,8 @@ static struct prop_handler prop_handlers[] = {
|
|||
},
|
||||
};
|
||||
|
||||
static int inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode,
|
||||
struct inode *parent)
|
||||
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct inode *parent)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
@ -457,41 +456,6 @@ static int inherit_props(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode,
|
||||
struct inode *dir)
|
||||
{
|
||||
if (!dir)
|
||||
return 0;
|
||||
|
||||
return inherit_props(trans, inode, dir);
|
||||
}
|
||||
|
||||
int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_root *parent_root)
|
||||
{
|
||||
struct super_block *sb = root->fs_info->sb;
|
||||
struct inode *parent_inode, *child_inode;
|
||||
int ret;
|
||||
|
||||
parent_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, parent_root);
|
||||
if (IS_ERR(parent_inode))
|
||||
return PTR_ERR(parent_inode);
|
||||
|
||||
child_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, root);
|
||||
if (IS_ERR(child_inode)) {
|
||||
iput(parent_inode);
|
||||
return PTR_ERR(child_inode);
|
||||
}
|
||||
|
||||
ret = inherit_props(trans, child_inode, parent_inode);
|
||||
iput(child_inode);
|
||||
iput(parent_inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __init btrfs_props_init(void)
|
||||
{
|
||||
int i;
|
||||
|
|
|
@ -23,8 +23,4 @@ int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
|||
struct inode *inode,
|
||||
struct inode *dir);
|
||||
|
||||
int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_root *parent_root);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2290,7 +2290,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
|
||||
if (!extent_buffer_uptodate(root_eb)) {
|
||||
ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
|
||||
ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
@ -3939,12 +3939,13 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
|
|||
}
|
||||
|
||||
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
|
||||
enum btrfs_qgroup_rsv_type type, bool enforce)
|
||||
enum btrfs_qgroup_rsv_type type, bool enforce,
|
||||
bool noflush)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
|
||||
if (ret <= 0 && ret != -EDQUOT)
|
||||
if ((ret <= 0 && ret != -EDQUOT) || noflush)
|
||||
return ret;
|
||||
|
||||
ret = try_flush_qgroup(root);
|
||||
|
|
|
@ -364,19 +364,23 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode,
|
|||
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
|
||||
enum btrfs_qgroup_rsv_type type, bool enforce);
|
||||
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
|
||||
enum btrfs_qgroup_rsv_type type, bool enforce);
|
||||
enum btrfs_qgroup_rsv_type type, bool enforce,
|
||||
bool noflush);
|
||||
/* Reserve metadata space for pertrans and prealloc type */
|
||||
static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
|
||||
int num_bytes, bool enforce)
|
||||
{
|
||||
return __btrfs_qgroup_reserve_meta(root, num_bytes,
|
||||
BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
|
||||
BTRFS_QGROUP_RSV_META_PERTRANS,
|
||||
enforce, false);
|
||||
}
|
||||
static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
|
||||
int num_bytes, bool enforce)
|
||||
int num_bytes, bool enforce,
|
||||
bool noflush)
|
||||
{
|
||||
return __btrfs_qgroup_reserve_meta(root, num_bytes,
|
||||
BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
|
||||
BTRFS_QGROUP_RSV_META_PREALLOC,
|
||||
enforce, noflush);
|
||||
}
|
||||
|
||||
void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -31,15 +31,14 @@ struct btrfs_raid_bio;
|
|||
struct btrfs_device;
|
||||
|
||||
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 stripe_len, int mirror_num, int generic_io);
|
||||
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 stripe_len);
|
||||
u32 stripe_len, int mirror_num, int generic_io);
|
||||
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stripe_len);
|
||||
|
||||
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
||||
u64 logical);
|
||||
unsigned int pgoff, u64 logical);
|
||||
|
||||
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||
struct btrfs_io_context *bioc, u64 stripe_len,
|
||||
struct btrfs_io_context *bioc, u32 stripe_len,
|
||||
struct btrfs_device *scrub_dev,
|
||||
unsigned long *dbitmap, int stripe_nsectors);
|
||||
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
|
||||
|
|
|
@ -614,14 +614,23 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
|
|||
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
{
|
||||
u64 range1_end = loff1 + len - 1;
|
||||
u64 range2_end = loff2 + len - 1;
|
||||
|
||||
if (inode1 < inode2) {
|
||||
swap(inode1, inode2);
|
||||
swap(loff1, loff2);
|
||||
swap(range1_end, range2_end);
|
||||
} else if (inode1 == inode2 && loff2 < loff1) {
|
||||
swap(loff1, loff2);
|
||||
swap(range1_end, range2_end);
|
||||
}
|
||||
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
|
||||
|
||||
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end);
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end);
|
||||
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);
|
||||
}
|
||||
|
||||
static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2)
|
||||
|
@ -771,7 +780,6 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
|||
struct inode *inode_in = file_inode(file_in);
|
||||
struct inode *inode_out = file_inode(file_out);
|
||||
u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize;
|
||||
bool same_inode = inode_out == inode_in;
|
||||
u64 wb_len;
|
||||
int ret;
|
||||
|
||||
|
@ -809,15 +817,6 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
|||
else
|
||||
wb_len = ALIGN(*len, bs);
|
||||
|
||||
/*
|
||||
* Since we don't lock ranges, wait for ongoing lockless dio writes (as
|
||||
* any in progress could create its ordered extents after we wait for
|
||||
* existing ordered extents below).
|
||||
*/
|
||||
inode_dio_wait(inode_in);
|
||||
if (!same_inode)
|
||||
inode_dio_wait(inode_out);
|
||||
|
||||
/*
|
||||
* Workaround to make sure NOCOW buffered write reach disk as NOCOW.
|
||||
*
|
||||
|
|
|
@ -362,7 +362,7 @@ struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
|
|||
rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
|
||||
if (rb_node) {
|
||||
node = rb_entry(rb_node, struct mapping_node, rb_node);
|
||||
root = (struct btrfs_root *)node->data;
|
||||
root = node->data;
|
||||
}
|
||||
spin_unlock(&rc->reloc_root_tree.lock);
|
||||
return btrfs_grab_root(root);
|
||||
|
@ -2997,7 +2997,8 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
|||
|
||||
/* Reserve metadata for this range */
|
||||
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
|
||||
clamped_len, clamped_len);
|
||||
clamped_len, clamped_len,
|
||||
false);
|
||||
if (ret)
|
||||
goto release_page;
|
||||
|
||||
|
@ -3845,8 +3846,7 @@ out:
|
|||
btrfs_end_transaction(trans);
|
||||
btrfs_btree_balance_dirty(fs_info);
|
||||
if (err) {
|
||||
if (inode)
|
||||
iput(inode);
|
||||
iput(inode);
|
||||
inode = ERR_PTR(err);
|
||||
}
|
||||
return inode;
|
||||
|
@ -3977,6 +3977,17 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
|||
if (!bg)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Relocation of a data block group creates ordered extents. Without
|
||||
* sb_start_write(), we can freeze the filesystem while unfinished
|
||||
* ordered extents are left. Such ordered extents can cause a deadlock
|
||||
* e.g. when syncfs() is waiting for their completion but they can't
|
||||
* finish because they block when joining a transaction, due to the
|
||||
* fact that the freeze locks are being held in write mode.
|
||||
*/
|
||||
if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
ASSERT(sb_write_started(fs_info->sb));
|
||||
|
||||
if (btrfs_pinned_by_swapfile(fs_info, bg)) {
|
||||
btrfs_put_block_group(bg);
|
||||
return -ETXTBSY;
|
||||
|
|
|
@ -509,7 +509,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
|||
/* One for parent inode, two for dir entries */
|
||||
qgroup_num_bytes = 3 * fs_info->nodesize;
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root,
|
||||
qgroup_num_bytes, true);
|
||||
qgroup_num_bytes, true,
|
||||
false);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
|
1899
fs/btrfs/scrub.c
1899
fs/btrfs/scrub.c
File diff suppressed because it is too large
Load Diff
400
fs/btrfs/send.c
400
fs/btrfs/send.c
|
@ -10,7 +10,6 @@
|
|||
#include <linux/mount.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/compat.h>
|
||||
|
@ -128,11 +127,18 @@ struct send_ctx {
|
|||
struct list_head new_refs;
|
||||
struct list_head deleted_refs;
|
||||
|
||||
struct radix_tree_root name_cache;
|
||||
struct xarray name_cache;
|
||||
struct list_head name_cache_list;
|
||||
int name_cache_size;
|
||||
|
||||
/*
|
||||
* The inode we are currently processing. It's not NULL only when we
|
||||
* need to issue write commands for data extents from this inode.
|
||||
*/
|
||||
struct inode *cur_inode;
|
||||
struct file_ra_state ra;
|
||||
u64 page_cache_clear_start;
|
||||
bool clean_page_cache;
|
||||
|
||||
/*
|
||||
* We process inodes by their increasing order, so if before an
|
||||
|
@ -262,14 +268,13 @@ struct orphan_dir_info {
|
|||
struct name_cache_entry {
|
||||
struct list_head list;
|
||||
/*
|
||||
* radix_tree has only 32bit entries but we need to handle 64bit inums.
|
||||
* We use the lower 32bit of the 64bit inum to store it in the tree. If
|
||||
* more then one inum would fall into the same entry, we use radix_list
|
||||
* to store the additional entries. radix_list is also used to store
|
||||
* entries where two entries have the same inum but different
|
||||
* generations.
|
||||
* On 32bit kernels, xarray has only 32bit indices, but we need to
|
||||
* handle 64bit inums. We use the lower 32bit of the 64bit inum to store
|
||||
* it in the tree. If more than one inum would fall into the same entry,
|
||||
* we use inum_aliases to store the additional entries. inum_aliases is
|
||||
* also used to store entries with the same inum but different generations.
|
||||
*/
|
||||
struct list_head radix_list;
|
||||
struct list_head inum_aliases;
|
||||
u64 ino;
|
||||
u64 gen;
|
||||
u64 parent_ino;
|
||||
|
@ -2019,9 +2024,9 @@ out:
|
|||
}
|
||||
|
||||
/*
|
||||
* Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
|
||||
* Insert a name cache entry. On 32bit kernels the xarray index is 32bit,
|
||||
* so we need to do some special handling in case we have clashes. This function
|
||||
* takes care of this with the help of name_cache_entry::radix_list.
|
||||
* takes care of this with the help of name_cache_entry::inum_aliases.
|
||||
* In case of error, nce is kfreed.
|
||||
*/
|
||||
static int name_cache_insert(struct send_ctx *sctx,
|
||||
|
@ -2030,8 +2035,7 @@ static int name_cache_insert(struct send_ctx *sctx,
|
|||
int ret = 0;
|
||||
struct list_head *nce_head;
|
||||
|
||||
nce_head = radix_tree_lookup(&sctx->name_cache,
|
||||
(unsigned long)nce->ino);
|
||||
nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
|
||||
if (!nce_head) {
|
||||
nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
|
||||
if (!nce_head) {
|
||||
|
@ -2040,14 +2044,14 @@ static int name_cache_insert(struct send_ctx *sctx,
|
|||
}
|
||||
INIT_LIST_HEAD(nce_head);
|
||||
|
||||
ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
|
||||
ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
kfree(nce_head);
|
||||
kfree(nce);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
list_add_tail(&nce->radix_list, nce_head);
|
||||
list_add_tail(&nce->inum_aliases, nce_head);
|
||||
list_add_tail(&nce->list, &sctx->name_cache_list);
|
||||
sctx->name_cache_size++;
|
||||
|
||||
|
@ -2059,15 +2063,14 @@ static void name_cache_delete(struct send_ctx *sctx,
|
|||
{
|
||||
struct list_head *nce_head;
|
||||
|
||||
nce_head = radix_tree_lookup(&sctx->name_cache,
|
||||
(unsigned long)nce->ino);
|
||||
nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
|
||||
if (!nce_head) {
|
||||
btrfs_err(sctx->send_root->fs_info,
|
||||
"name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
|
||||
nce->ino, sctx->name_cache_size);
|
||||
}
|
||||
|
||||
list_del(&nce->radix_list);
|
||||
list_del(&nce->inum_aliases);
|
||||
list_del(&nce->list);
|
||||
sctx->name_cache_size--;
|
||||
|
||||
|
@ -2075,7 +2078,7 @@ static void name_cache_delete(struct send_ctx *sctx,
|
|||
* We may not get to the final release of nce_head if the lookup fails
|
||||
*/
|
||||
if (nce_head && list_empty(nce_head)) {
|
||||
radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
|
||||
xa_erase(&sctx->name_cache, (unsigned long)nce->ino);
|
||||
kfree(nce_head);
|
||||
}
|
||||
}
|
||||
|
@ -2086,11 +2089,11 @@ static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
|
|||
struct list_head *nce_head;
|
||||
struct name_cache_entry *cur;
|
||||
|
||||
nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
|
||||
nce_head = xa_load(&sctx->name_cache, (unsigned long)ino);
|
||||
if (!nce_head)
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(cur, nce_head, radix_list) {
|
||||
list_for_each_entry(cur, nce_head, inum_aliases) {
|
||||
if (cur->ino == ino && cur->gen == gen)
|
||||
return cur;
|
||||
}
|
||||
|
@ -2675,61 +2678,43 @@ out:
|
|||
static int did_create_dir(struct send_ctx *sctx, u64 dir)
|
||||
{
|
||||
int ret = 0;
|
||||
int iter_ret = 0;
|
||||
struct btrfs_path *path = NULL;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct btrfs_key di_key;
|
||||
struct extent_buffer *eb;
|
||||
struct btrfs_dir_item *di;
|
||||
int slot;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
key.objectid = dir;
|
||||
key.type = BTRFS_DIR_INDEX_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (1) {
|
||||
eb = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (slot >= btrfs_header_nritems(eb)) {
|
||||
ret = btrfs_next_leaf(sctx->send_root, path);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
btrfs_for_each_slot(sctx->send_root, &key, &found_key, path, iter_ret) {
|
||||
struct extent_buffer *eb = path->nodes[0];
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &found_key, slot);
|
||||
if (found_key.objectid != key.objectid ||
|
||||
found_key.type != key.type) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
|
||||
di = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dir_item);
|
||||
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
|
||||
|
||||
if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
|
||||
di_key.objectid < sctx->send_progress) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0)
|
||||
ret = iter_ret;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
@ -2933,6 +2918,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
|||
u64 send_progress)
|
||||
{
|
||||
int ret = 0;
|
||||
int iter_ret = 0;
|
||||
struct btrfs_root *root = sctx->parent_root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
|
@ -2959,23 +2945,9 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
|||
if (odi)
|
||||
key.offset = odi->last_dir_index_offset;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (1) {
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
struct waiting_dir_move *dm;
|
||||
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
||||
path->slots[0]);
|
||||
if (found_key.objectid != key.objectid ||
|
||||
found_key.type != key.type)
|
||||
break;
|
||||
|
@ -3010,8 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
|||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
if (iter_ret < 0) {
|
||||
ret = iter_ret;
|
||||
goto out;
|
||||
}
|
||||
free_orphan_dir_info(sctx, odi);
|
||||
|
||||
|
@ -3579,7 +3553,7 @@ static int check_ino_in_path(struct btrfs_root *root,
|
|||
}
|
||||
|
||||
/*
|
||||
* Check if ino ino1 is an ancestor of inode ino2 in the given root for any
|
||||
* Check if inode ino1 is an ancestor of inode ino2 in the given root for any
|
||||
* possible path (in case ino2 is not a directory and has multiple hard links).
|
||||
* Return 1 if true, 0 if false and < 0 on error.
|
||||
*/
|
||||
|
@ -3591,6 +3565,7 @@ static int is_ancestor(struct btrfs_root *root,
|
|||
{
|
||||
bool free_fs_path = false;
|
||||
int ret = 0;
|
||||
int iter_ret = 0;
|
||||
struct btrfs_path *path = NULL;
|
||||
struct btrfs_key key;
|
||||
|
||||
|
@ -3611,26 +3586,12 @@ static int is_ancestor(struct btrfs_root *root,
|
|||
key.type = BTRFS_INODE_REF_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (true) {
|
||||
btrfs_for_each_slot(root, &key, &key, path, iter_ret) {
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
u32 cur_offset = 0;
|
||||
u32 item_size;
|
||||
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (key.objectid != ino2)
|
||||
break;
|
||||
if (key.type != BTRFS_INODE_REF_KEY &&
|
||||
|
@ -3668,10 +3629,12 @@ static int is_ancestor(struct btrfs_root *root,
|
|||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
if (iter_ret < 0)
|
||||
ret = iter_ret;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
if (free_fs_path)
|
||||
fs_path_free(fs_path);
|
||||
|
@ -4551,13 +4514,12 @@ out:
|
|||
static int process_all_refs(struct send_ctx *sctx,
|
||||
enum btrfs_compare_tree_result cmd)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int iter_ret = 0;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *eb;
|
||||
int slot;
|
||||
iterate_inode_ref_t cb;
|
||||
int pending_move = 0;
|
||||
|
||||
|
@ -4581,24 +4543,7 @@ static int process_all_refs(struct send_ctx *sctx,
|
|||
key.objectid = sctx->cmp_key->objectid;
|
||||
key.type = BTRFS_INODE_REF_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (1) {
|
||||
eb = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (slot >= btrfs_header_nritems(eb)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &found_key, slot);
|
||||
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
if (found_key.objectid != key.objectid ||
|
||||
(found_key.type != BTRFS_INODE_REF_KEY &&
|
||||
found_key.type != BTRFS_INODE_EXTREF_KEY))
|
||||
|
@ -4607,8 +4552,11 @@ static int process_all_refs(struct send_ctx *sctx,
|
|||
ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0) {
|
||||
ret = iter_ret;
|
||||
goto out;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
|
@ -4870,13 +4818,12 @@ out:
|
|||
|
||||
static int process_all_new_xattrs(struct send_ctx *sctx)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int iter_ret = 0;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *eb;
|
||||
int slot;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path)
|
||||
|
@ -4887,39 +4834,21 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
|
|||
key.objectid = sctx->cmp_key->objectid;
|
||||
key.type = BTRFS_XATTR_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (1) {
|
||||
eb = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (slot >= btrfs_header_nritems(eb)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &found_key, slot);
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
if (found_key.objectid != key.objectid ||
|
||||
found_key.type != key.type) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
path->slots[0]++;
|
||||
break;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0)
|
||||
ret = iter_ret;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
@ -4946,7 +4875,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
|
|||
{
|
||||
struct btrfs_root *root = sctx->send_root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct inode *inode;
|
||||
struct page *page;
|
||||
pgoff_t index = offset >> PAGE_SHIFT;
|
||||
pgoff_t last_index;
|
||||
|
@ -4957,37 +4885,30 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
last_index = (offset + len - 1) >> PAGE_SHIFT;
|
||||
|
||||
/* initial readahead */
|
||||
memset(&sctx->ra, 0, sizeof(struct file_ra_state));
|
||||
file_ra_state_init(&sctx->ra, inode->i_mapping);
|
||||
|
||||
while (index <= last_index) {
|
||||
unsigned cur_len = min_t(unsigned, len,
|
||||
PAGE_SIZE - pg_offset);
|
||||
|
||||
page = find_lock_page(inode->i_mapping, index);
|
||||
page = find_lock_page(sctx->cur_inode->i_mapping, index);
|
||||
if (!page) {
|
||||
page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
|
||||
NULL, index, last_index + 1 - index);
|
||||
page_cache_sync_readahead(sctx->cur_inode->i_mapping,
|
||||
&sctx->ra, NULL, index,
|
||||
last_index + 1 - index);
|
||||
|
||||
page = find_or_create_page(inode->i_mapping, index,
|
||||
GFP_KERNEL);
|
||||
page = find_or_create_page(sctx->cur_inode->i_mapping,
|
||||
index, GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (PageReadahead(page)) {
|
||||
page_cache_async_readahead(inode->i_mapping, &sctx->ra,
|
||||
NULL, page, index, last_index + 1 - index);
|
||||
}
|
||||
if (PageReadahead(page))
|
||||
page_cache_async_readahead(sctx->cur_inode->i_mapping,
|
||||
&sctx->ra, NULL, page, index,
|
||||
last_index + 1 - index);
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
btrfs_readpage(NULL, page);
|
||||
|
@ -5013,7 +4934,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
|
|||
len -= cur_len;
|
||||
sctx->send_size += cur_len;
|
||||
}
|
||||
iput(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -5220,12 +5141,49 @@ static int send_extent_data(struct send_ctx *sctx,
|
|||
const u64 offset,
|
||||
const u64 len)
|
||||
{
|
||||
const u64 end = offset + len;
|
||||
u64 read_size = max_send_read_size(sctx);
|
||||
u64 sent = 0;
|
||||
|
||||
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
|
||||
return send_update_extent(sctx, offset, len);
|
||||
|
||||
if (sctx->cur_inode == NULL) {
|
||||
struct btrfs_root *root = sctx->send_root;
|
||||
|
||||
sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root);
|
||||
if (IS_ERR(sctx->cur_inode)) {
|
||||
int err = PTR_ERR(sctx->cur_inode);
|
||||
|
||||
sctx->cur_inode = NULL;
|
||||
return err;
|
||||
}
|
||||
memset(&sctx->ra, 0, sizeof(struct file_ra_state));
|
||||
file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping);
|
||||
|
||||
/*
|
||||
* It's very likely there are no pages from this inode in the page
|
||||
* cache, so after reading extents and sending their data, we clean
|
||||
* the page cache to avoid trashing the page cache (adding pressure
|
||||
* to the page cache and forcing eviction of other data more useful
|
||||
* for applications).
|
||||
*
|
||||
* We decide if we should clean the page cache simply by checking
|
||||
* if the inode's mapping nrpages is 0 when we first open it, and
|
||||
* not by using something like filemap_range_has_page() before
|
||||
* reading an extent because when we ask the readahead code to
|
||||
* read a given file range, it may (and almost always does) read
|
||||
* pages from beyond that range (see the documentation for
|
||||
* page_cache_sync_readahead()), so it would not be reliable,
|
||||
* because after reading the first extent future calls to
|
||||
* filemap_range_has_page() would return true because the readahead
|
||||
* on the previous extent resulted in reading pages of the current
|
||||
* extent as well.
|
||||
*/
|
||||
sctx->clean_page_cache = (sctx->cur_inode->i_mapping->nrpages == 0);
|
||||
sctx->page_cache_clear_start = round_down(offset, PAGE_SIZE);
|
||||
}
|
||||
|
||||
while (sent < len) {
|
||||
u64 size = min(len - sent, read_size);
|
||||
int ret;
|
||||
|
@ -5235,6 +5193,37 @@ static int send_extent_data(struct send_ctx *sctx,
|
|||
return ret;
|
||||
sent += size;
|
||||
}
|
||||
|
||||
if (sctx->clean_page_cache && IS_ALIGNED(end, PAGE_SIZE)) {
|
||||
/*
|
||||
* Always operate only on ranges that are a multiple of the page
|
||||
* size. This is not only to prevent zeroing parts of a page in
|
||||
* the case of subpage sector size, but also to guarantee we evict
|
||||
* pages, as passing a range that is smaller than page size does
|
||||
* not evict the respective page (only zeroes part of its content).
|
||||
*
|
||||
* Always start from the end offset of the last range cleared.
|
||||
* This is because the readahead code may (and very often does)
|
||||
* reads pages beyond the range we request for readahead. So if
|
||||
* we have an extent layout like this:
|
||||
*
|
||||
* [ extent A ] [ extent B ] [ extent C ]
|
||||
*
|
||||
* When we ask page_cache_sync_readahead() to read extent A, it
|
||||
* may also trigger reads for pages of extent B. If we are doing
|
||||
* an incremental send and extent B has not changed between the
|
||||
* parent and send snapshots, some or all of its pages may end
|
||||
* up being read and placed in the page cache. So when truncating
|
||||
* the page cache we always start from the end offset of the
|
||||
* previously processed extent up to the end of the current
|
||||
* extent.
|
||||
*/
|
||||
truncate_inode_pages_range(&sctx->cur_inode->i_data,
|
||||
sctx->page_cache_clear_start,
|
||||
end - 1);
|
||||
sctx->page_cache_clear_start = end;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5965,13 +5954,12 @@ out:
|
|||
|
||||
static int process_all_extents(struct send_ctx *sctx)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int iter_ret = 0;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *eb;
|
||||
int slot;
|
||||
|
||||
root = sctx->send_root;
|
||||
path = alloc_path_for_send();
|
||||
|
@ -5981,41 +5969,21 @@ static int process_all_extents(struct send_ctx *sctx)
|
|||
key.objectid = sctx->cmp_key->objectid;
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (1) {
|
||||
eb = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
|
||||
if (slot >= btrfs_header_nritems(eb)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
} else if (ret > 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &found_key, slot);
|
||||
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
if (found_key.objectid != key.objectid ||
|
||||
found_key.type != key.type) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = process_extent(sctx, path, &found_key);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
path->slots[0]++;
|
||||
break;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0)
|
||||
ret = iter_ret;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
@ -6205,8 +6173,11 @@ static int btrfs_unlink_all_paths(struct send_ctx *sctx)
|
|||
{
|
||||
LIST_HEAD(deleted_refs);
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_root *root = sctx->parent_root;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct parent_paths_ctx ctx;
|
||||
int iter_ret = 0;
|
||||
int ret;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
|
@ -6216,39 +6187,26 @@ static int btrfs_unlink_all_paths(struct send_ctx *sctx)
|
|||
key.objectid = sctx->cur_ino;
|
||||
key.type = BTRFS_INODE_REF_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ctx.refs = &deleted_refs;
|
||||
ctx.sctx = sctx;
|
||||
|
||||
while (true) {
|
||||
struct extent_buffer *eb = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
|
||||
if (slot >= btrfs_header_nritems(eb)) {
|
||||
ret = btrfs_next_leaf(sctx->parent_root, path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &key, slot);
|
||||
if (key.objectid != sctx->cur_ino)
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
if (found_key.objectid != key.objectid)
|
||||
break;
|
||||
if (key.type != BTRFS_INODE_REF_KEY &&
|
||||
key.type != BTRFS_INODE_EXTREF_KEY)
|
||||
if (found_key.type != key.type &&
|
||||
found_key.type != BTRFS_INODE_EXTREF_KEY)
|
||||
break;
|
||||
|
||||
ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
|
||||
ret = iterate_inode_ref(root, path, &found_key, 1,
|
||||
record_parent_ref, &ctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0) {
|
||||
ret = iter_ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (!list_empty(&deleted_refs)) {
|
||||
|
@ -6270,6 +6228,30 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void close_current_inode(struct send_ctx *sctx)
|
||||
{
|
||||
u64 i_size;
|
||||
|
||||
if (sctx->cur_inode == NULL)
|
||||
return;
|
||||
|
||||
i_size = i_size_read(sctx->cur_inode);
|
||||
|
||||
/*
|
||||
* If we are doing an incremental send, we may have extents between the
|
||||
* last processed extent and the i_size that have not been processed
|
||||
* because they haven't changed but we may have read some of their pages
|
||||
* through readahead, see the comments at send_extent_data().
|
||||
*/
|
||||
if (sctx->clean_page_cache && sctx->page_cache_clear_start < i_size)
|
||||
truncate_inode_pages_range(&sctx->cur_inode->i_data,
|
||||
sctx->page_cache_clear_start,
|
||||
round_up(i_size, PAGE_SIZE) - 1);
|
||||
|
||||
iput(sctx->cur_inode);
|
||||
sctx->cur_inode = NULL;
|
||||
}
|
||||
|
||||
static int changed_inode(struct send_ctx *sctx,
|
||||
enum btrfs_compare_tree_result result)
|
||||
{
|
||||
|
@ -6280,6 +6262,8 @@ static int changed_inode(struct send_ctx *sctx,
|
|||
u64 left_gen = 0;
|
||||
u64 right_gen = 0;
|
||||
|
||||
close_current_inode(sctx);
|
||||
|
||||
sctx->cur_ino = key->objectid;
|
||||
sctx->cur_inode_new_gen = 0;
|
||||
sctx->cur_inode_last_extent = (u64)-1;
|
||||
|
@ -7534,7 +7518,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
|
|||
|
||||
INIT_LIST_HEAD(&sctx->new_refs);
|
||||
INIT_LIST_HEAD(&sctx->deleted_refs);
|
||||
INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
|
||||
xa_init_flags(&sctx->name_cache, GFP_KERNEL);
|
||||
INIT_LIST_HEAD(&sctx->name_cache_list);
|
||||
|
||||
sctx->flags = arg->flags;
|
||||
|
@ -7766,6 +7750,8 @@ out:
|
|||
|
||||
name_cache_free(sctx);
|
||||
|
||||
close_current_inode(sctx);
|
||||
|
||||
kfree(sctx);
|
||||
}
|
||||
|
||||
|
|
|
@ -181,6 +181,12 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
|
|||
found->full = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Block groups with more than this value (percents) of unusable space will be
|
||||
* scheduled for background reclaim.
|
||||
*/
|
||||
#define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH (75)
|
||||
|
||||
static int create_space_info(struct btrfs_fs_info *info, u64 flags)
|
||||
{
|
||||
|
||||
|
@ -203,6 +209,9 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
|
|||
INIT_LIST_HEAD(&space_info->priority_tickets);
|
||||
space_info->clamp = 1;
|
||||
|
||||
if (btrfs_is_zoned(info))
|
||||
space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
|
||||
|
||||
ret = btrfs_sysfs_add_space_info_type(info, space_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -519,7 +528,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
|
|||
items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
|
||||
}
|
||||
|
||||
trans = (struct btrfs_trans_handle *)current->journal_info;
|
||||
trans = current->journal_info;
|
||||
|
||||
/*
|
||||
* If we are doing more ordered than delalloc we need to just wait on
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#ifndef BTRFS_SPACE_INFO_H
|
||||
#define BTRFS_SPACE_INFO_H
|
||||
|
||||
#include "volumes.h"
|
||||
|
||||
struct btrfs_space_info {
|
||||
spinlock_t lock;
|
||||
|
||||
|
@ -24,6 +26,12 @@ struct btrfs_space_info {
|
|||
the space info if we had an ENOSPC in the
|
||||
allocator. */
|
||||
|
||||
/*
|
||||
* Once a block group drops below this threshold (percents) we'll
|
||||
* schedule it for reclaim.
|
||||
*/
|
||||
int bg_reclaim_threshold;
|
||||
|
||||
int clamp; /* Used to scale our threshold for preemptive
|
||||
flushing. The value is >> clamp, so turns
|
||||
out to be a 2^clamp divisor. */
|
||||
|
|
|
@ -63,6 +63,29 @@
|
|||
* This means a slightly higher tree locking latency.
|
||||
*/
|
||||
|
||||
bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page)
|
||||
{
|
||||
if (fs_info->sectorsize >= PAGE_SIZE)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Only data pages (either through DIO or compression) can have no
|
||||
* mapping. And if page->mapping->host is data inode, it's subpage.
|
||||
* As we have ruled our sectorsize >= PAGE_SIZE case already.
|
||||
*/
|
||||
if (!page->mapping || !page->mapping->host ||
|
||||
is_data_inode(page->mapping->host))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Now the only remaining case is metadata, which we only go subpage
|
||||
* routine if nodesize < PAGE_SIZE.
|
||||
*/
|
||||
if (fs_info->nodesize < PAGE_SIZE)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize)
|
||||
{
|
||||
unsigned int cur = 0;
|
||||
|
@ -107,7 +130,7 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
|||
ASSERT(PageLocked(page));
|
||||
|
||||
/* Either not subpage, or the page already has private attached */
|
||||
if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page))
|
||||
if (!btrfs_is_subpage(fs_info, page) || PagePrivate(page))
|
||||
return 0;
|
||||
|
||||
subpage = btrfs_alloc_subpage(fs_info, type);
|
||||
|
@ -124,10 +147,10 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_subpage *subpage;
|
||||
|
||||
/* Either not subpage, or already detached */
|
||||
if (fs_info->sectorsize == PAGE_SIZE || !PagePrivate(page))
|
||||
if (!btrfs_is_subpage(fs_info, page) || !PagePrivate(page))
|
||||
return;
|
||||
|
||||
subpage = (struct btrfs_subpage *)detach_page_private(page);
|
||||
subpage = detach_page_private(page);
|
||||
ASSERT(subpage);
|
||||
btrfs_free_subpage(subpage);
|
||||
}
|
||||
|
@ -175,7 +198,7 @@ void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
|
|||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
|
||||
if (fs_info->sectorsize == PAGE_SIZE)
|
||||
if (!btrfs_is_subpage(fs_info, page))
|
||||
return;
|
||||
|
||||
ASSERT(PagePrivate(page) && page->mapping);
|
||||
|
@ -190,7 +213,7 @@ void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info,
|
|||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
|
||||
if (fs_info->sectorsize == PAGE_SIZE)
|
||||
if (!btrfs_is_subpage(fs_info, page))
|
||||
return;
|
||||
|
||||
ASSERT(PagePrivate(page) && page->mapping);
|
||||
|
@ -319,7 +342,7 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
|
|||
int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) {
|
||||
lock_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
@ -336,7 +359,7 @@ int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
|||
void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page))
|
||||
return unlock_page(page);
|
||||
btrfs_subpage_clamp_range(page, &start, &len);
|
||||
if (btrfs_subpage_end_and_test_writer(fs_info, page, start, len))
|
||||
|
@ -620,7 +643,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
|
|||
void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) { \
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \
|
||||
set_page_func(page); \
|
||||
return; \
|
||||
} \
|
||||
|
@ -629,7 +652,7 @@ void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \
|
|||
void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) { \
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \
|
||||
clear_page_func(page); \
|
||||
return; \
|
||||
} \
|
||||
|
@ -638,14 +661,14 @@ void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|||
bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) \
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \
|
||||
return test_page_func(page); \
|
||||
return btrfs_subpage_test_##name(fs_info, page, start, len); \
|
||||
} \
|
||||
void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) { \
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \
|
||||
set_page_func(page); \
|
||||
return; \
|
||||
} \
|
||||
|
@ -655,7 +678,7 @@ void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info, \
|
|||
void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) { \
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \
|
||||
clear_page_func(page); \
|
||||
return; \
|
||||
} \
|
||||
|
@ -665,7 +688,7 @@ void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|||
bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) \
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \
|
||||
return test_page_func(page); \
|
||||
btrfs_subpage_clamp_range(page, &start, &len); \
|
||||
return btrfs_subpage_test_##name(fs_info, page, start, len); \
|
||||
|
@ -694,7 +717,7 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
|||
return;
|
||||
|
||||
ASSERT(!PageDirty(page));
|
||||
if (fs_info->sectorsize == PAGE_SIZE)
|
||||
if (!btrfs_is_subpage(fs_info, page))
|
||||
return;
|
||||
|
||||
ASSERT(PagePrivate(page) && page->private);
|
||||
|
@ -722,8 +745,8 @@ void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
|
|||
struct btrfs_subpage *subpage;
|
||||
|
||||
ASSERT(PageLocked(page));
|
||||
/* For regular page size case, we just unlock the page */
|
||||
if (fs_info->sectorsize == PAGE_SIZE)
|
||||
/* For non-subpage case, we just unlock the page */
|
||||
if (!btrfs_is_subpage(fs_info, page))
|
||||
return unlock_page(page);
|
||||
|
||||
ASSERT(PagePrivate(page) && page->private);
|
||||
|
|
|
@ -74,6 +74,8 @@ enum btrfs_subpage_type {
|
|||
BTRFS_SUBPAGE_DATA,
|
||||
};
|
||||
|
||||
bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page);
|
||||
|
||||
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize);
|
||||
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, enum btrfs_subpage_type type);
|
||||
|
|
|
@ -261,7 +261,7 @@ static struct ratelimit_state printk_limits[] = {
|
|||
RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
|
||||
};
|
||||
|
||||
void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
{
|
||||
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
|
||||
struct va_format vaf;
|
||||
|
@ -292,10 +292,10 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
|
|||
char statestr[STATE_STRING_BUF_LEN];
|
||||
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
|
||||
_printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
|
||||
fs_info->sb->s_id, statestr, &vaf);
|
||||
} else {
|
||||
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
|
||||
_printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1903,6 +1903,7 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
|
|||
old_pool_size, new_pool_size);
|
||||
|
||||
btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
|
||||
|
@ -1912,8 +1913,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
|
|||
btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
|
||||
btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
|
||||
new_pool_size);
|
||||
}
|
||||
|
||||
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
|
||||
|
|
|
@ -394,11 +394,9 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
|
|||
{
|
||||
ssize_t ret = 0;
|
||||
|
||||
/* 4K sector size is also supported with 64K page size */
|
||||
if (PAGE_SIZE == SZ_64K)
|
||||
/* An artificial limit to only support 4K and PAGE_SIZE */
|
||||
if (PAGE_SIZE > SZ_4K)
|
||||
ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K);
|
||||
|
||||
/* Only sectorsize == PAGE_SIZE is now supported */
|
||||
ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE);
|
||||
|
||||
return ret;
|
||||
|
@ -722,6 +720,42 @@ SPACE_INFO_ATTR(bytes_zone_unusable);
|
|||
SPACE_INFO_ATTR(disk_used);
|
||||
SPACE_INFO_ATTR(disk_total);
|
||||
|
||||
static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
ssize_t ret;
|
||||
|
||||
ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
int thresh;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &thresh);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (thresh < 0 || thresh > 100)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(space_info->bg_reclaim_threshold, thresh);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
BTRFS_ATTR_RW(space_info, bg_reclaim_threshold,
|
||||
btrfs_sinfo_bg_reclaim_threshold_show,
|
||||
btrfs_sinfo_bg_reclaim_threshold_store);
|
||||
|
||||
/*
|
||||
* Allocation information about block group types.
|
||||
*
|
||||
|
@ -738,6 +772,7 @@ static struct attribute *space_info_attrs[] = {
|
|||
BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
|
||||
BTRFS_ATTR_PTR(space_info, disk_used),
|
||||
BTRFS_ATTR_PTR(space_info, disk_total),
|
||||
BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(space_info);
|
||||
|
|
|
@ -150,8 +150,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
|||
|
||||
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct radix_tree_iter iter;
|
||||
void **slot;
|
||||
unsigned long index;
|
||||
struct extent_buffer *eb;
|
||||
struct btrfs_device *dev, *tmp;
|
||||
|
||||
if (!fs_info)
|
||||
|
@ -163,25 +163,9 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
|||
|
||||
test_mnt->mnt_sb->s_fs_info = NULL;
|
||||
|
||||
spin_lock(&fs_info->buffer_lock);
|
||||
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
|
||||
if (!eb)
|
||||
continue;
|
||||
/* Shouldn't happen but that kind of thinking creates CVE's */
|
||||
if (radix_tree_exception(eb)) {
|
||||
if (radix_tree_deref_retry(eb))
|
||||
slot = radix_tree_iter_retry(&iter);
|
||||
continue;
|
||||
}
|
||||
slot = radix_tree_iter_resume(slot, &iter);
|
||||
spin_unlock(&fs_info->buffer_lock);
|
||||
xa_for_each(&fs_info->extent_buffers, index, eb) {
|
||||
free_extent_buffer_stale(eb);
|
||||
spin_lock(&fs_info->buffer_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->buffer_lock);
|
||||
|
||||
btrfs_mapping_tree_free(&fs_info->mapping_tree);
|
||||
list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,
|
||||
|
@ -202,7 +186,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
|
|||
if (!root)
|
||||
return;
|
||||
/* Will be freed by btrfs_free_fs_roots */
|
||||
if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
|
||||
if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state)))
|
||||
return;
|
||||
btrfs_global_root_delete(root);
|
||||
btrfs_put_root(root);
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "space-info.h"
|
||||
#include "zoned.h"
|
||||
|
||||
#define BTRFS_ROOT_TRANS_TAG 0
|
||||
#define BTRFS_ROOT_TRANS_TAG XA_MARK_0
|
||||
|
||||
/*
|
||||
* Transaction states and transitions
|
||||
|
@ -221,7 +221,7 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
|||
* the caching thread will re-start it's search from 3, and thus find
|
||||
* the hole from [4,6) to add to the free space cache.
|
||||
*/
|
||||
spin_lock(&fs_info->block_group_cache_lock);
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
list_for_each_entry_safe(caching_ctl, next,
|
||||
&fs_info->caching_block_groups, list) {
|
||||
struct btrfs_block_group *cache = caching_ctl->block_group;
|
||||
|
@ -234,7 +234,7 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
|||
cache->last_byte_to_unpin = caching_ctl->progress;
|
||||
}
|
||||
}
|
||||
spin_unlock(&fs_info->block_group_cache_lock);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
}
|
||||
|
||||
|
@ -437,15 +437,15 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
|||
*/
|
||||
smp_wmb();
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
if (root->last_trans == trans->transid && !force) {
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
return 0;
|
||||
}
|
||||
radix_tree_tag_set(&fs_info->fs_roots_radix,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
xa_set_mark(&fs_info->fs_roots,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
root->last_trans = trans->transid;
|
||||
|
||||
/* this is pretty tricky. We don't want to
|
||||
|
@ -487,11 +487,9 @@ void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
|
|||
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||
|
||||
/* Make sure we don't try to update the root at commit time */
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
radix_tree_tag_clear(&fs_info->fs_roots_radix,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
xa_clear_mark(&fs_info->fs_roots,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
}
|
||||
|
||||
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
|
@ -1404,9 +1402,8 @@ void btrfs_add_dead_root(struct btrfs_root *root)
|
|||
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *gang[8];
|
||||
int i;
|
||||
int ret;
|
||||
struct btrfs_root *root;
|
||||
unsigned long index;
|
||||
|
||||
/*
|
||||
* At this point no one can be using this transaction to modify any tree
|
||||
|
@ -1414,57 +1411,46 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
|
|||
*/
|
||||
ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
while (1) {
|
||||
ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
|
||||
(void **)gang, 0,
|
||||
ARRAY_SIZE(gang),
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
if (ret == 0)
|
||||
break;
|
||||
for (i = 0; i < ret; i++) {
|
||||
struct btrfs_root *root = gang[i];
|
||||
int ret2;
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
xa_for_each_marked(&fs_info->fs_roots, index, root, BTRFS_ROOT_TRANS_TAG) {
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* At this point we can neither have tasks logging inodes
|
||||
* from a root nor trying to commit a log tree.
|
||||
*/
|
||||
ASSERT(atomic_read(&root->log_writers) == 0);
|
||||
ASSERT(atomic_read(&root->log_commit[0]) == 0);
|
||||
ASSERT(atomic_read(&root->log_commit[1]) == 0);
|
||||
/*
|
||||
* At this point we can neither have tasks logging inodes
|
||||
* from a root nor trying to commit a log tree.
|
||||
*/
|
||||
ASSERT(atomic_read(&root->log_writers) == 0);
|
||||
ASSERT(atomic_read(&root->log_commit[0]) == 0);
|
||||
ASSERT(atomic_read(&root->log_commit[1]) == 0);
|
||||
|
||||
radix_tree_tag_clear(&fs_info->fs_roots_radix,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
xa_clear_mark(&fs_info->fs_roots,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
|
||||
btrfs_free_log(trans, root);
|
||||
ret2 = btrfs_update_reloc_root(trans, root);
|
||||
if (ret2)
|
||||
return ret2;
|
||||
btrfs_free_log(trans, root);
|
||||
ret = btrfs_update_reloc_root(trans, root);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* see comments in should_cow_block() */
|
||||
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
|
||||
smp_mb__after_atomic();
|
||||
/* See comments in should_cow_block() */
|
||||
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
|
||||
smp_mb__after_atomic();
|
||||
|
||||
if (root->commit_root != root->node) {
|
||||
list_add_tail(&root->dirty_list,
|
||||
&trans->transaction->switch_commits);
|
||||
btrfs_set_root_node(&root->root_item,
|
||||
root->node);
|
||||
}
|
||||
|
||||
ret2 = btrfs_update_root(trans, fs_info->tree_root,
|
||||
&root->root_key,
|
||||
&root->root_item);
|
||||
if (ret2)
|
||||
return ret2;
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
btrfs_qgroup_free_meta_all_pertrans(root);
|
||||
if (root->commit_root != root->node) {
|
||||
list_add_tail(&root->dirty_list,
|
||||
&trans->transaction->switch_commits);
|
||||
btrfs_set_root_node(&root->root_item, root->node);
|
||||
}
|
||||
|
||||
ret = btrfs_update_root(trans, fs_info->tree_root,
|
||||
&root->root_key, &root->root_item);
|
||||
if (ret)
|
||||
return ret;
|
||||
spin_lock(&fs_info->fs_roots_lock);
|
||||
btrfs_qgroup_free_meta_all_pertrans(root);
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
spin_unlock(&fs_info->fs_roots_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1855,3 +1855,58 @@ out:
|
|||
return ret;
|
||||
}
|
||||
ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
|
||||
|
||||
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
|
||||
{
|
||||
const bool is_subvol = is_fstree(root_owner);
|
||||
const u64 eb_owner = btrfs_header_owner(eb);
|
||||
|
||||
/*
|
||||
* Skip dummy fs, as selftests don't create unique ebs for each dummy
|
||||
* root.
|
||||
*/
|
||||
if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &eb->fs_info->fs_state))
|
||||
return 0;
|
||||
/*
|
||||
* There are several call sites (backref walking, qgroup, and data
|
||||
* reloc) passing 0 as @root_owner, as they are not holding the
|
||||
* tree root. In that case, we can not do a reliable ownership check,
|
||||
* so just exit.
|
||||
*/
|
||||
if (root_owner == 0)
|
||||
return 0;
|
||||
/*
|
||||
* These trees use key.offset as their owner, our callers don't have
|
||||
* the extra capacity to pass key.offset here. So we just skip them.
|
||||
*/
|
||||
if (root_owner == BTRFS_TREE_LOG_OBJECTID ||
|
||||
root_owner == BTRFS_TREE_RELOC_OBJECTID)
|
||||
return 0;
|
||||
|
||||
if (!is_subvol) {
|
||||
/* For non-subvolume trees, the eb owner should match root owner */
|
||||
if (unlikely(root_owner != eb_owner)) {
|
||||
btrfs_crit(eb->fs_info,
|
||||
"corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
||||
root_owner, btrfs_header_bytenr(eb), eb_owner,
|
||||
root_owner);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For subvolume trees, owners can mismatch, but they should all belong
|
||||
* to subvolume trees.
|
||||
*/
|
||||
if (unlikely(is_subvol != is_fstree(eb_owner))) {
|
||||
btrfs_crit(eb->fs_info,
|
||||
"corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
||||
root_owner, btrfs_header_bytenr(eb), eb_owner,
|
||||
BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -25,5 +25,6 @@ int btrfs_check_node(struct extent_buffer *node);
|
|||
|
||||
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk, u64 logical);
|
||||
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -333,7 +333,7 @@ static int process_one_buffer(struct btrfs_root *log,
|
|||
* pin down any logged extents, so we have to read the block.
|
||||
*/
|
||||
if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
|
||||
ret = btrfs_read_buffer(eb, gen, level, NULL);
|
||||
ret = btrfs_read_extent_buffer(eb, gen, level, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -894,8 +894,7 @@ update_inode:
|
|||
btrfs_update_inode_bytes(BTRFS_I(inode), nbytes, drop_args.bytes_found);
|
||||
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||
out:
|
||||
if (inode)
|
||||
iput(inode);
|
||||
iput(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2575,7 +2574,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
|
|||
int i;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_read_buffer(eb, gen, level, NULL);
|
||||
ret = btrfs_read_extent_buffer(eb, gen, level, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -2786,7 +2785,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
|||
|
||||
path->slots[*level]++;
|
||||
if (wc->free) {
|
||||
ret = btrfs_read_buffer(next, ptr_gen,
|
||||
ret = btrfs_read_extent_buffer(next, ptr_gen,
|
||||
*level - 1, &first_key);
|
||||
if (ret) {
|
||||
free_extent_buffer(next);
|
||||
|
@ -2815,7 +2814,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
|||
free_extent_buffer(next);
|
||||
continue;
|
||||
}
|
||||
ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key);
|
||||
ret = btrfs_read_extent_buffer(next, ptr_gen, *level - 1, &first_key);
|
||||
if (ret) {
|
||||
free_extent_buffer(next);
|
||||
return ret;
|
||||
|
|
|
@ -164,24 +164,12 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
|||
*/
|
||||
enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags)
|
||||
{
|
||||
if (flags & BTRFS_BLOCK_GROUP_RAID10)
|
||||
return BTRFS_RAID_RAID10;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_RAID1)
|
||||
return BTRFS_RAID_RAID1;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
|
||||
return BTRFS_RAID_RAID1C3;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
|
||||
return BTRFS_RAID_RAID1C4;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_DUP)
|
||||
return BTRFS_RAID_DUP;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_RAID0)
|
||||
return BTRFS_RAID_RAID0;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_RAID5)
|
||||
return BTRFS_RAID_RAID5;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_RAID6)
|
||||
return BTRFS_RAID_RAID6;
|
||||
const u64 profile = (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK);
|
||||
|
||||
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
|
||||
if (!profile)
|
||||
return BTRFS_RAID_SINGLE;
|
||||
|
||||
return BTRFS_BG_FLAG_TO_INDEX(profile);
|
||||
}
|
||||
|
||||
const char *btrfs_bg_type_to_raid_name(u64 flags)
|
||||
|
@ -4062,13 +4050,6 @@ static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
|
|||
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
|
||||
return true;
|
||||
|
||||
if (fs_info->sectorsize < PAGE_SIZE &&
|
||||
bargs->target & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
btrfs_err(fs_info,
|
||||
"RAID56 is not yet supported for sectorsize %u with page size %lu",
|
||||
fs_info->sectorsize, PAGE_SIZE);
|
||||
return false;
|
||||
}
|
||||
/* Profile is valid and does not have bits outside of the allowed set */
|
||||
if (alloc_profile_is_valid(bargs->target, 1) &&
|
||||
(bargs->target & ~allowed) == 0)
|
||||
|
@ -6312,7 +6293,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
|
|||
u64 offset;
|
||||
u64 stripe_offset;
|
||||
u64 stripe_nr;
|
||||
u64 stripe_len;
|
||||
u32 stripe_len;
|
||||
u64 raid56_full_stripe_start = (u64)-1;
|
||||
int data_stripes;
|
||||
|
||||
|
@ -6323,19 +6304,13 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
|
|||
offset = logical - em->start;
|
||||
/* Len of a stripe in a chunk */
|
||||
stripe_len = map->stripe_len;
|
||||
/* Stripe where this block falls in */
|
||||
stripe_nr = div64_u64(offset, stripe_len);
|
||||
/* Offset of stripe in the chunk */
|
||||
stripe_offset = stripe_nr * stripe_len;
|
||||
if (offset < stripe_offset) {
|
||||
btrfs_crit(fs_info,
|
||||
"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
|
||||
stripe_offset, offset, em->start, logical, stripe_len);
|
||||
return -EINVAL;
|
||||
}
|
||||
/*
|
||||
* Stripe_nr is where this block falls in
|
||||
* stripe_offset is the offset of this block in its stripe.
|
||||
*/
|
||||
stripe_nr = div64_u64_rem(offset, stripe_len, &stripe_offset);
|
||||
ASSERT(stripe_offset < U32_MAX);
|
||||
|
||||
/* stripe_offset is the offset of this block in its stripe */
|
||||
stripe_offset = offset - stripe_offset;
|
||||
data_stripes = nr_data_stripes(map);
|
||||
|
||||
/* Only stripe based profiles needs to check against stripe length. */
|
||||
|
@ -6737,11 +6712,11 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
|
|||
bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
|
||||
(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
|
||||
dev->devid, bio->bi_iter.bi_size);
|
||||
bio_set_dev(bio, dev->bdev);
|
||||
|
||||
btrfs_bio_counter_inc_noblocked(fs_info);
|
||||
|
||||
btrfsic_submit_bio(bio);
|
||||
btrfsic_check_bio(bio);
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logical)
|
||||
|
@ -6823,10 +6798,12 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (dev_nr < total_devs - 1)
|
||||
bio = btrfs_bio_clone(first_bio);
|
||||
else
|
||||
if (dev_nr < total_devs - 1) {
|
||||
bio = btrfs_bio_clone(dev->bdev, first_bio);
|
||||
} else {
|
||||
bio = first_bio;
|
||||
bio_set_dev(bio, dev->bdev);
|
||||
}
|
||||
|
||||
submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev);
|
||||
}
|
||||
|
@ -7359,7 +7336,6 @@ static int read_one_dev(struct extent_buffer *leaf,
|
|||
|
||||
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
struct btrfs_super_block *super_copy = fs_info->super_copy;
|
||||
struct extent_buffer *sb;
|
||||
struct btrfs_disk_key *disk_key;
|
||||
|
@ -7375,30 +7351,16 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
|||
struct btrfs_key key;
|
||||
|
||||
ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
|
||||
|
||||
/*
|
||||
* This will create extent buffer of nodesize, superblock size is
|
||||
* fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
|
||||
* overallocate but we can keep it as-is, only the first page is used.
|
||||
* We allocated a dummy extent, just to use extent buffer accessors.
|
||||
* There will be unused space after BTRFS_SUPER_INFO_SIZE, but
|
||||
* that's fine, we will not go beyond system chunk array anyway.
|
||||
*/
|
||||
sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
|
||||
root->root_key.objectid, 0);
|
||||
if (IS_ERR(sb))
|
||||
return PTR_ERR(sb);
|
||||
sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET);
|
||||
if (!sb)
|
||||
return -ENOMEM;
|
||||
set_extent_buffer_uptodate(sb);
|
||||
/*
|
||||
* The sb extent buffer is artificial and just used to read the system array.
|
||||
* set_extent_buffer_uptodate() call does not properly mark all it's
|
||||
* pages up-to-date when the page is larger: extent does not cover the
|
||||
* whole page and consequently check_page_uptodate does not find all
|
||||
* the page's extents up-to-date (the hole beyond sb),
|
||||
* write_extent_buffer then triggers a WARN_ON.
|
||||
*
|
||||
* Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
|
||||
* but sb spans only this function. Add an explicit SetPageUptodate call
|
||||
* to silence the warning eg. on PowerPC 64.
|
||||
*/
|
||||
if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
|
||||
SetPageUptodate(sb->pages[0]);
|
||||
|
||||
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
|
||||
array_size = btrfs_super_sys_array_size(super_copy);
|
||||
|
@ -7561,6 +7523,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
|
|||
struct btrfs_key found_key;
|
||||
int ret;
|
||||
int slot;
|
||||
int iter_ret = 0;
|
||||
u64 total_dev = 0;
|
||||
u64 last_ra_node = 0;
|
||||
|
||||
|
@ -7604,30 +7567,18 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
|
|||
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
|
||||
key.offset = 0;
|
||||
key.type = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
while (1) {
|
||||
struct extent_buffer *node;
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
struct extent_buffer *node = path->nodes[1];
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret == 0)
|
||||
continue;
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
break;
|
||||
}
|
||||
node = path->nodes[1];
|
||||
|
||||
if (node) {
|
||||
if (last_ra_node != node->start) {
|
||||
readahead_tree_node_children(node);
|
||||
last_ra_node = node->start;
|
||||
}
|
||||
}
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
if (found_key.type == BTRFS_DEV_ITEM_KEY) {
|
||||
struct btrfs_dev_item *dev_item;
|
||||
dev_item = btrfs_item_ptr(leaf, slot,
|
||||
|
@ -7652,7 +7603,11 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
|
|||
if (ret)
|
||||
goto error;
|
||||
}
|
||||
path->slots[0]++;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0) {
|
||||
ret = iter_ret;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7660,12 +7615,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
|
|||
* do another round of validation checks.
|
||||
*/
|
||||
if (total_dev != fs_info->fs_devices->total_devices) {
|
||||
btrfs_err(fs_info,
|
||||
"super_num_devices %llu mismatch with num_devices %llu found here",
|
||||
btrfs_warn(fs_info,
|
||||
"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit",
|
||||
btrfs_super_num_devices(fs_info->super_copy),
|
||||
total_dev);
|
||||
ret = -EINVAL;
|
||||
goto error;
|
||||
fs_info->fs_devices->total_devices = total_dev;
|
||||
btrfs_set_super_num_devices(fs_info->super_copy, total_dev);
|
||||
}
|
||||
if (btrfs_super_total_bytes(fs_info->super_copy) <
|
||||
fs_info->fs_devices->total_rw_bytes) {
|
||||
|
@ -8277,7 +8232,7 @@ bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
|
|||
|
||||
static int relocating_repair_kthread(void *data)
|
||||
{
|
||||
struct btrfs_block_group *cache = (struct btrfs_block_group *)data;
|
||||
struct btrfs_block_group *cache = data;
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
u64 target;
|
||||
int ret = 0;
|
||||
|
|
|
@ -17,17 +17,51 @@ extern struct mutex uuid_mutex;
|
|||
|
||||
#define BTRFS_STRIPE_LEN SZ_64K
|
||||
|
||||
/* Used by sanity check for btrfs_raid_types. */
|
||||
#define const_ffs(n) (__builtin_ctzll(n) + 1)
|
||||
|
||||
/*
|
||||
* The conversion from BTRFS_BLOCK_GROUP_* bits to btrfs_raid_type requires
|
||||
* RAID0 always to be the lowest profile bit.
|
||||
* Although it's part of on-disk format and should never change, do extra
|
||||
* compile-time sanity checks.
|
||||
*/
|
||||
static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
|
||||
const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
|
||||
static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
|
||||
ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
|
||||
|
||||
/* ilog2() can handle both constants and variables */
|
||||
#define BTRFS_BG_FLAG_TO_INDEX(profile) \
|
||||
ilog2((profile) >> (ilog2(BTRFS_BLOCK_GROUP_RAID0) - 1))
|
||||
|
||||
enum btrfs_raid_types {
|
||||
/* SINGLE is the special one as it doesn't have on-disk bit. */
|
||||
BTRFS_RAID_SINGLE = 0,
|
||||
|
||||
BTRFS_RAID_RAID0 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID0),
|
||||
BTRFS_RAID_RAID1 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1),
|
||||
BTRFS_RAID_DUP = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_DUP),
|
||||
BTRFS_RAID_RAID10 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID10),
|
||||
BTRFS_RAID_RAID5 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID5),
|
||||
BTRFS_RAID_RAID6 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID6),
|
||||
BTRFS_RAID_RAID1C3 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C3),
|
||||
BTRFS_RAID_RAID1C4 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C4),
|
||||
|
||||
BTRFS_NR_RAID_TYPES
|
||||
};
|
||||
|
||||
struct btrfs_io_geometry {
|
||||
/* remaining bytes before crossing a stripe */
|
||||
u64 len;
|
||||
/* offset of logical address in chunk */
|
||||
u64 offset;
|
||||
/* length of single IO stripe */
|
||||
u64 stripe_len;
|
||||
u32 stripe_len;
|
||||
/* offset of address in stripe */
|
||||
u32 stripe_offset;
|
||||
/* number of stripe where address falls */
|
||||
u64 stripe_nr;
|
||||
/* offset of address in stripe */
|
||||
u64 stripe_offset;
|
||||
/* offset of raid56 stripe into the chunk */
|
||||
u64 raid56_stripe_offset;
|
||||
};
|
||||
|
@ -430,7 +464,7 @@ struct map_lookup {
|
|||
u64 type;
|
||||
int io_align;
|
||||
int io_width;
|
||||
u64 stripe_len;
|
||||
u32 stripe_len;
|
||||
int num_stripes;
|
||||
int sub_stripes;
|
||||
int verified_stripes; /* For mount time dev extent verification */
|
||||
|
|
|
@ -272,10 +272,12 @@ out:
|
|||
|
||||
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
||||
{
|
||||
struct btrfs_key found_key;
|
||||
struct btrfs_key key;
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_path *path;
|
||||
int iter_ret = 0;
|
||||
int ret = 0;
|
||||
size_t total_size = 0, size_left = size;
|
||||
|
||||
|
@ -294,44 +296,23 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
|||
path->reada = READA_FORWARD;
|
||||
|
||||
/* search for our xattrs */
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
while (1) {
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_key found_key;
|
||||
u32 item_size;
|
||||
u32 cur;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
|
||||
/* this is where we start walking through the path */
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
/*
|
||||
* if we've reached the last slot in this leaf we need
|
||||
* to go to the next leaf and reset everything
|
||||
*/
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
else if (ret > 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
|
||||
/* check to make sure this item is what we want */
|
||||
if (found_key.objectid != key.objectid)
|
||||
break;
|
||||
if (found_key.type > BTRFS_XATTR_ITEM_KEY)
|
||||
break;
|
||||
if (found_key.type < BTRFS_XATTR_ITEM_KEY)
|
||||
goto next_item;
|
||||
continue;
|
||||
|
||||
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
||||
item_size = btrfs_item_size(leaf, slot);
|
||||
|
@ -351,8 +332,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
|||
goto next;
|
||||
|
||||
if (!buffer || (name_len + 1) > size_left) {
|
||||
ret = -ERANGE;
|
||||
goto err;
|
||||
iter_ret = -ERANGE;
|
||||
break;
|
||||
}
|
||||
|
||||
read_extent_buffer(leaf, buffer, name_ptr, name_len);
|
||||
|
@ -364,12 +345,13 @@ next:
|
|||
cur += this_len;
|
||||
di = (struct btrfs_dir_item *)((char *)di + this_len);
|
||||
}
|
||||
next_item:
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = total_size;
|
||||
|
||||
err:
|
||||
if (iter_ret < 0)
|
||||
ret = iter_ret;
|
||||
else
|
||||
ret = total_size;
|
||||
|
||||
btrfs_free_path(path);
|
||||
|
||||
return ret;
|
||||
|
|
225
fs/btrfs/zoned.c
225
fs/btrfs/zoned.c
|
@ -51,11 +51,13 @@
|
|||
#define BTRFS_MIN_ACTIVE_ZONES (BTRFS_SUPER_MIRROR_MAX + 5)
|
||||
|
||||
/*
|
||||
* Maximum supported zone size. Currently, SMR disks have a zone size of
|
||||
* 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not
|
||||
* expect the zone size to become larger than 8GiB in the near future.
|
||||
* Minimum / maximum supported zone size. Currently, SMR disks have a zone
|
||||
* size of 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range.
|
||||
* We do not expect the zone size to become larger than 8GiB or smaller than
|
||||
* 4MiB in the near future.
|
||||
*/
|
||||
#define BTRFS_MAX_ZONE_SIZE SZ_8G
|
||||
#define BTRFS_MIN_ZONE_SIZE SZ_4M
|
||||
|
||||
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
|
||||
|
||||
|
@ -401,6 +403,13 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
|
|||
zone_info->zone_size, BTRFS_MAX_ZONE_SIZE);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
} else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) {
|
||||
btrfs_err_in_rcu(fs_info,
|
||||
"zoned: %s: zone size %llu smaller than supported minimum %u",
|
||||
rcu_str_deref(device->name),
|
||||
zone_info->zone_size, BTRFS_MIN_ZONE_SIZE);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nr_sectors = bdev_nr_sectors(bdev);
|
||||
|
@ -1835,7 +1844,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
|||
}
|
||||
|
||||
/* No space left */
|
||||
if (block_group->alloc_offset == block_group->zone_capacity) {
|
||||
if (btrfs_zoned_bg_is_full(block_group)) {
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@ -1872,20 +1881,14 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
bool need_zone_finish;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
|
||||
map = block_group->physical_map;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (!block_group->zone_is_active) {
|
||||
spin_unlock(&block_group->lock);
|
||||
|
@ -1895,39 +1898,55 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
|||
/* Check if we have unwritten allocated space */
|
||||
if ((block_group->flags &
|
||||
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
|
||||
block_group->alloc_offset > block_group->meta_write_pointer) {
|
||||
block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
ret = btrfs_inc_block_group_ro(block_group, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Ensure all writes in this block group finish */
|
||||
btrfs_wait_block_group_reservations(block_group);
|
||||
/* No need to wait for NOCOW writers. Zoned mode does not allow that. */
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||
block_group->length);
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
/*
|
||||
* Bail out if someone already deactivated the block group, or
|
||||
* allocated space is left in the block group.
|
||||
* If we are sure that the block group is full (= no more room left for
|
||||
* new allocation) and the IO for the last usable block is completed, we
|
||||
* don't need to wait for the other IOs. This holds because we ensure
|
||||
* the sequential IO submissions using the ZONE_APPEND command for data
|
||||
* and block_group->meta_write_pointer for metadata.
|
||||
*/
|
||||
if (!block_group->zone_is_active) {
|
||||
if (!fully_written) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return 0;
|
||||
|
||||
ret = btrfs_inc_block_group_ro(block_group, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Ensure all writes in this block group finish */
|
||||
btrfs_wait_block_group_reservations(block_group);
|
||||
/* No need to wait for NOCOW writers. Zoned mode does not allow that */
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||
block_group->length);
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
/*
|
||||
* Bail out if someone already deactivated the block group, or
|
||||
* allocated space is left in the block group.
|
||||
*/
|
||||
if (!block_group->zone_is_active) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (block_group->reserved) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
if (block_group->reserved) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return -EAGAIN;
|
||||
}
|
||||
/*
|
||||
* The block group is not fully allocated, so not fully written yet. We
|
||||
* need to send ZONE_FINISH command to free up an active zone.
|
||||
*/
|
||||
need_zone_finish = !btrfs_zoned_bg_is_full(block_group);
|
||||
|
||||
block_group->zone_is_active = 0;
|
||||
block_group->alloc_offset = block_group->zone_capacity;
|
||||
|
@ -1936,24 +1955,29 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
|||
btrfs_clear_data_reloc_bg(block_group);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
map = block_group->physical_map;
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
device = map->stripes[i].dev;
|
||||
physical = map->stripes[i].physical;
|
||||
struct btrfs_device *device = map->stripes[i].dev;
|
||||
const u64 physical = map->stripes[i].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
continue;
|
||||
|
||||
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||
physical >> SECTOR_SHIFT,
|
||||
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||
GFP_NOFS);
|
||||
if (need_zone_finish) {
|
||||
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||
physical >> SECTOR_SHIFT,
|
||||
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||
GFP_NOFS);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
}
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
|
||||
if (!fully_written)
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||
|
@ -1966,6 +1990,14 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||
{
|
||||
if (!btrfs_is_zoned(block_group->fs_info))
|
||||
return 0;
|
||||
|
||||
return do_zone_finish(block_group, false);
|
||||
}
|
||||
|
||||
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = fs_devices->fs_info;
|
||||
|
@ -1997,9 +2029,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
|
|||
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
u64 min_alloc_bytes;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return;
|
||||
|
@ -2007,44 +2037,54 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
|
|||
block_group = btrfs_lookup_block_group(fs_info, logical);
|
||||
ASSERT(block_group);
|
||||
|
||||
if (logical + length < block_group->start + block_group->zone_capacity)
|
||||
/* No MIXED_BG on zoned btrfs. */
|
||||
if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
min_alloc_bytes = fs_info->sectorsize;
|
||||
else
|
||||
min_alloc_bytes = fs_info->nodesize;
|
||||
|
||||
/* Bail out if we can allocate more data from this block group. */
|
||||
if (logical + length + min_alloc_bytes <=
|
||||
block_group->start + block_group->zone_capacity)
|
||||
goto out;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (!block_group->zone_is_active) {
|
||||
spin_unlock(&block_group->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
block_group->zone_is_active = 0;
|
||||
/* We should have consumed all the free space */
|
||||
ASSERT(block_group->alloc_offset == block_group->zone_capacity);
|
||||
ASSERT(block_group->free_space_ctl->free_space == 0);
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
btrfs_clear_data_reloc_bg(block_group);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
map = block_group->physical_map;
|
||||
device = map->stripes[0].dev;
|
||||
physical = map->stripes[0].physical;
|
||||
|
||||
if (!device->zone_info->max_active_zones)
|
||||
goto out;
|
||||
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||
list_del_init(&block_group->active_bg_list);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
|
||||
btrfs_put_block_group(block_group);
|
||||
do_zone_finish(block_group, true);
|
||||
|
||||
out:
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
|
||||
static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_block_group *bg =
|
||||
container_of(work, struct btrfs_block_group, zone_finish_work);
|
||||
|
||||
wait_on_extent_buffer_writeback(bg->last_eb);
|
||||
free_extent_buffer(bg->last_eb);
|
||||
btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
|
||||
struct extent_buffer *eb)
|
||||
{
|
||||
if (!bg->seq_zone || eb->start + eb->len * 2 <= bg->start + bg->zone_capacity)
|
||||
return;
|
||||
|
||||
if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) {
|
||||
btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing",
|
||||
bg->start);
|
||||
return;
|
||||
}
|
||||
|
||||
/* For the work */
|
||||
btrfs_get_block_group(bg);
|
||||
atomic_inc(&eb->refs);
|
||||
bg->last_eb = eb;
|
||||
INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
|
||||
queue_work(system_unbound_wq, &bg->zone_finish_work);
|
||||
}
|
||||
|
||||
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bg->fs_info;
|
||||
|
@ -2072,3 +2112,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
|
|||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
}
|
||||
|
||||
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_device *device;
|
||||
u64 used = 0;
|
||||
u64 total = 0;
|
||||
u64 factor;
|
||||
|
||||
ASSERT(btrfs_is_zoned(fs_info));
|
||||
|
||||
if (fs_info->bg_reclaim_threshold == 0)
|
||||
return false;
|
||||
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
if (!device->bdev)
|
||||
continue;
|
||||
|
||||
total += device->disk_total_bytes;
|
||||
used += device->bytes_used;
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
factor = div64_u64(used * 100, total);
|
||||
return factor >= fs_info->bg_reclaim_threshold;
|
||||
}
|
||||
|
|
|
@ -10,11 +10,7 @@
|
|||
#include "block-group.h"
|
||||
#include "btrfs_inode.h"
|
||||
|
||||
/*
|
||||
* Block groups with more than this value (percents) of unusable space will be
|
||||
* scheduled for background reclaim.
|
||||
*/
|
||||
#define BTRFS_DEFAULT_RECLAIM_THRESH 75
|
||||
#define BTRFS_DEFAULT_RECLAIM_THRESH (75)
|
||||
|
||||
struct btrfs_zoned_device_info {
|
||||
/*
|
||||
|
@ -76,8 +72,11 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group);
|
|||
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
|
||||
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
|
||||
u64 length);
|
||||
void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
|
||||
struct extent_buffer *eb);
|
||||
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
|
||||
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone)
|
||||
|
@ -233,9 +232,17 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
|
|||
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 length) { }
|
||||
|
||||
static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
|
||||
struct extent_buffer *eb) { }
|
||||
|
||||
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
|
||||
|
||||
static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
|
||||
|
||||
static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||
|
@ -370,4 +377,10 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
|
|||
mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
|
||||
}
|
||||
|
||||
static inline bool btrfs_zoned_bg_is_full(const struct btrfs_block_group *bg)
|
||||
{
|
||||
ASSERT(btrfs_is_zoned(bg->fs_info));
|
||||
return (bg->alloc_offset == bg->zone_capacity);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -93,22 +93,26 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
|
|||
|
||||
void zstd_free_workspace(struct list_head *ws);
|
||||
struct list_head *zstd_alloc_workspace(unsigned int level);
|
||||
/*
|
||||
* zstd_reclaim_timer_fn - reclaim timer
|
||||
|
||||
/**
|
||||
* Timer callback to free unused workspaces.
|
||||
*
|
||||
* @t: timer
|
||||
*
|
||||
* This scans the lru_list and attempts to reclaim any workspace that hasn't
|
||||
* been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
|
||||
*
|
||||
* The context is softirq and does not need the _bh locking primitives.
|
||||
*/
|
||||
static void zstd_reclaim_timer_fn(struct timer_list *timer)
|
||||
{
|
||||
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
|
||||
struct list_head *pos, *next;
|
||||
|
||||
spin_lock_bh(&wsm.lock);
|
||||
spin_lock(&wsm.lock);
|
||||
|
||||
if (list_empty(&wsm.lru_list)) {
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
spin_unlock(&wsm.lock);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -137,7 +141,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
|
|||
if (!list_empty(&wsm.lru_list))
|
||||
mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
|
||||
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
spin_unlock(&wsm.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -399,7 +399,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
|
||||
if (!err)
|
||||
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
|
||||
NULL, 0, 0);
|
||||
NULL, 0, NULL, 0);
|
||||
if (err < 0)
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
return generic_file_read_iter(iocb, to);
|
||||
}
|
||||
|
||||
ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0);
|
||||
ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, NULL, 0);
|
||||
inode_unlock_shared(inode);
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
@ -565,7 +565,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||
iomap_ops = &ext4_iomap_overwrite_ops;
|
||||
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
|
||||
(unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
|
||||
0);
|
||||
NULL, 0);
|
||||
if (ret == -ENOTBLK)
|
||||
ret = 0;
|
||||
|
||||
|
|
|
@ -4308,7 +4308,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
*/
|
||||
inc_page_count(sbi, F2FS_DIO_READ);
|
||||
dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
|
||||
&f2fs_iomap_dio_read_ops, 0, 0);
|
||||
&f2fs_iomap_dio_read_ops, 0, NULL, 0);
|
||||
if (IS_ERR_OR_NULL(dio)) {
|
||||
ret = PTR_ERR_OR_ZERO(dio);
|
||||
if (ret != -EIOCBQUEUED)
|
||||
|
@ -4526,7 +4526,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
|||
if (pos + count > inode->i_size)
|
||||
dio_flags |= IOMAP_DIO_FORCE_WAIT;
|
||||
dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
|
||||
&f2fs_iomap_dio_write_ops, dio_flags, 0);
|
||||
&f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
|
||||
if (IS_ERR_OR_NULL(dio)) {
|
||||
ret = PTR_ERR_OR_ZERO(dio);
|
||||
if (ret == -ENOTBLK)
|
||||
|
|
|
@ -835,7 +835,7 @@ retry:
|
|||
pagefault_disable();
|
||||
to->nofault = true;
|
||||
ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
|
||||
IOMAP_DIO_PARTIAL, read);
|
||||
IOMAP_DIO_PARTIAL, NULL, read);
|
||||
to->nofault = false;
|
||||
pagefault_enable();
|
||||
if (ret <= 0 && ret != -EFAULT)
|
||||
|
@ -898,7 +898,7 @@ retry:
|
|||
|
||||
from->nofault = true;
|
||||
ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
|
||||
IOMAP_DIO_PARTIAL, written);
|
||||
IOMAP_DIO_PARTIAL, NULL, written);
|
||||
from->nofault = false;
|
||||
if (ret <= 0) {
|
||||
if (ret == -ENOTBLK)
|
||||
|
|
|
@ -51,6 +51,15 @@ struct iomap_dio {
|
|||
};
|
||||
};
|
||||
|
||||
static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
|
||||
struct iomap_dio *dio, unsigned short nr_vecs, unsigned int opf)
|
||||
{
|
||||
if (dio->dops && dio->dops->bio_set)
|
||||
return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf,
|
||||
GFP_KERNEL, dio->dops->bio_set);
|
||||
return bio_alloc(iter->iomap.bdev, nr_vecs, opf, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void iomap_dio_submit_bio(const struct iomap_iter *iter,
|
||||
struct iomap_dio *dio, struct bio *bio, loff_t pos)
|
||||
{
|
||||
|
@ -145,7 +154,7 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
|
|||
cmpxchg(&dio->error, 0, ret);
|
||||
}
|
||||
|
||||
static void iomap_dio_bio_end_io(struct bio *bio)
|
||||
void iomap_dio_bio_end_io(struct bio *bio)
|
||||
{
|
||||
struct iomap_dio *dio = bio->bi_private;
|
||||
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
|
||||
|
@ -177,16 +186,16 @@ static void iomap_dio_bio_end_io(struct bio *bio)
|
|||
bio_put(bio);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
|
||||
|
||||
static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
||||
loff_t pos, unsigned len)
|
||||
{
|
||||
struct inode *inode = file_inode(dio->iocb->ki_filp);
|
||||
struct page *page = ZERO_PAGE(0);
|
||||
int flags = REQ_SYNC | REQ_IDLE;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL);
|
||||
bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
|
||||
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
|
||||
GFP_KERNEL);
|
||||
bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
|
||||
|
@ -311,7 +320,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||
goto out;
|
||||
}
|
||||
|
||||
bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL);
|
||||
bio = iomap_dio_alloc_bio(iter, dio, nr_pages, bio_opf);
|
||||
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
|
||||
GFP_KERNEL);
|
||||
bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
|
||||
|
@ -474,7 +483,7 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter,
|
|||
struct iomap_dio *
|
||||
__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
unsigned int dio_flags, size_t done_before)
|
||||
unsigned int dio_flags, void *private, size_t done_before)
|
||||
{
|
||||
struct address_space *mapping = iocb->ki_filp->f_mapping;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
@ -483,6 +492,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
.pos = iocb->ki_pos,
|
||||
.len = iov_iter_count(iter),
|
||||
.flags = IOMAP_DIRECT,
|
||||
.private = private,
|
||||
};
|
||||
loff_t end = iomi.pos + iomi.len - 1, ret = 0;
|
||||
bool wait_for_completion =
|
||||
|
@ -672,11 +682,12 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw);
|
|||
ssize_t
|
||||
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
unsigned int dio_flags, size_t done_before)
|
||||
unsigned int dio_flags, void *private, size_t done_before)
|
||||
{
|
||||
struct iomap_dio *dio;
|
||||
|
||||
dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before);
|
||||
dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,
|
||||
done_before);
|
||||
if (IS_ERR_OR_NULL(dio))
|
||||
return PTR_ERR_OR_ZERO(dio);
|
||||
return iomap_dio_complete(dio);
|
||||
|
|
|
@ -225,7 +225,7 @@ xfs_file_dio_read(
|
|||
ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0);
|
||||
ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
|
||||
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||
|
||||
return ret;
|
||||
|
@ -534,7 +534,7 @@ xfs_file_dio_write_aligned(
|
|||
}
|
||||
trace_xfs_file_direct_write(iocb, from);
|
||||
ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
|
||||
&xfs_dio_write_ops, 0, 0);
|
||||
&xfs_dio_write_ops, 0, NULL, 0);
|
||||
out_unlock:
|
||||
if (iolock)
|
||||
xfs_iunlock(ip, iolock);
|
||||
|
@ -612,7 +612,7 @@ retry_exclusive:
|
|||
|
||||
trace_xfs_file_direct_write(iocb, from);
|
||||
ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
|
||||
&xfs_dio_write_ops, flags, 0);
|
||||
&xfs_dio_write_ops, flags, NULL, 0);
|
||||
|
||||
/*
|
||||
* Retry unaligned I/O with exclusive blocking semantics if the DIO
|
||||
|
|
|
@ -900,7 +900,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
ret = zonefs_file_dio_append(iocb, from);
|
||||
else
|
||||
ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
|
||||
&zonefs_write_dio_ops, 0, 0);
|
||||
&zonefs_write_dio_ops, 0, NULL, 0);
|
||||
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
|
||||
(ret > 0 || ret == -EIOCBQUEUED)) {
|
||||
if (ret > 0)
|
||||
|
@ -1042,7 +1042,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
}
|
||||
file_accessed(iocb->ki_filp);
|
||||
ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops,
|
||||
&zonefs_read_dio_ops, 0, 0);
|
||||
&zonefs_read_dio_ops, 0, NULL, 0);
|
||||
} else {
|
||||
ret = generic_file_read_iter(iocb, to);
|
||||
if (ret == -EIO)
|
||||
|
|
|
@ -1708,6 +1708,11 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
|
|||
#define __sb_writers_release(sb, lev) \
|
||||
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
|
||||
|
||||
static inline bool sb_write_started(const struct super_block *sb)
|
||||
{
|
||||
return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* sb_end_write - drop write access to a superblock
|
||||
* @sb: the super we wrote to
|
||||
|
|
|
@ -188,6 +188,7 @@ struct iomap_iter {
|
|||
unsigned flags;
|
||||
struct iomap iomap;
|
||||
struct iomap srcmap;
|
||||
void *private;
|
||||
};
|
||||
|
||||
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
|
||||
|
@ -320,6 +321,16 @@ struct iomap_dio_ops {
|
|||
unsigned flags);
|
||||
void (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
|
||||
loff_t file_offset);
|
||||
|
||||
/*
|
||||
* Filesystems wishing to attach private information to a direct io bio
|
||||
* must provide a ->submit_io method that attaches the additional
|
||||
* information to the bio and changes the ->bi_end_io callback to a
|
||||
* custom function. This function should, at a minimum, perform any
|
||||
* relevant post-processing of the bio and end with a call to
|
||||
* iomap_dio_bio_end_io.
|
||||
*/
|
||||
struct bio_set *bio_set;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -344,11 +355,12 @@ struct iomap_dio_ops {
|
|||
|
||||
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
unsigned int dio_flags, size_t done_before);
|
||||
unsigned int dio_flags, void *private, size_t done_before);
|
||||
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||
unsigned int dio_flags, size_t done_before);
|
||||
unsigned int dio_flags, void *private, size_t done_before);
|
||||
ssize_t iomap_dio_complete(struct iomap_dio *dio);
|
||||
void iomap_dio_bio_end_io(struct bio *bio);
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
struct file;
|
||||
|
|
|
@ -24,7 +24,7 @@ struct btrfs_free_cluster;
|
|||
struct map_lookup;
|
||||
struct extent_buffer;
|
||||
struct btrfs_work;
|
||||
struct __btrfs_workqueue;
|
||||
struct btrfs_workqueue;
|
||||
struct btrfs_qgroup_extent_record;
|
||||
struct btrfs_qgroup;
|
||||
struct extent_io_tree;
|
||||
|
@ -1457,42 +1457,36 @@ DEFINE_EVENT(btrfs__work, btrfs_ordered_sched,
|
|||
TP_ARGS(work)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__workqueue,
|
||||
DECLARE_EVENT_CLASS(btrfs_workqueue,
|
||||
|
||||
TP_PROTO(const struct __btrfs_workqueue *wq,
|
||||
const char *name, int high),
|
||||
TP_PROTO(const struct btrfs_workqueue *wq, const char *name),
|
||||
|
||||
TP_ARGS(wq, name, high),
|
||||
TP_ARGS(wq, name),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( const void *, wq )
|
||||
__string( name, name )
|
||||
__field( int , high )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(btrfs_workqueue_owner(wq),
|
||||
__entry->wq = wq;
|
||||
__assign_str(name, name);
|
||||
__entry->high = high;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("name=%s%s wq=%p", __get_str(name),
|
||||
__print_flags(__entry->high, "",
|
||||
{(WQ_HIGHPRI), "-high"}),
|
||||
TP_printk_btrfs("name=%s wq=%p", __get_str(name),
|
||||
__entry->wq)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__workqueue, btrfs_workqueue_alloc,
|
||||
DEFINE_EVENT(btrfs_workqueue, btrfs_workqueue_alloc,
|
||||
|
||||
TP_PROTO(const struct __btrfs_workqueue *wq,
|
||||
const char *name, int high),
|
||||
TP_PROTO(const struct btrfs_workqueue *wq, const char *name),
|
||||
|
||||
TP_ARGS(wq, name, high)
|
||||
TP_ARGS(wq, name)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__workqueue_done,
|
||||
DECLARE_EVENT_CLASS(btrfs_workqueue_done,
|
||||
|
||||
TP_PROTO(const struct __btrfs_workqueue *wq),
|
||||
TP_PROTO(const struct btrfs_workqueue *wq),
|
||||
|
||||
TP_ARGS(wq),
|
||||
|
||||
|
@ -1507,9 +1501,9 @@ DECLARE_EVENT_CLASS(btrfs__workqueue_done,
|
|||
TP_printk_btrfs("wq=%p", __entry->wq)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
|
||||
DEFINE_EVENT(btrfs_workqueue_done, btrfs_workqueue_destroy,
|
||||
|
||||
TP_PROTO(const struct __btrfs_workqueue *wq),
|
||||
TP_PROTO(const struct btrfs_workqueue *wq),
|
||||
|
||||
TP_ARGS(wq)
|
||||
);
|
||||
|
|
|
@ -880,19 +880,6 @@ struct btrfs_dev_replace_item {
|
|||
#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
|
||||
BTRFS_SPACE_INFO_GLOBAL_RSV)
|
||||
|
||||
enum btrfs_raid_types {
|
||||
BTRFS_RAID_RAID10,
|
||||
BTRFS_RAID_RAID1,
|
||||
BTRFS_RAID_DUP,
|
||||
BTRFS_RAID_RAID0,
|
||||
BTRFS_RAID_SINGLE,
|
||||
BTRFS_RAID_RAID5,
|
||||
BTRFS_RAID_RAID6,
|
||||
BTRFS_RAID_RAID1C3,
|
||||
BTRFS_RAID_RAID1C4,
|
||||
BTRFS_NR_RAID_TYPES
|
||||
};
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
|
||||
BTRFS_BLOCK_GROUP_SYSTEM | \
|
||||
BTRFS_BLOCK_GROUP_METADATA)
|
||||
|
|
Loading…
Reference in New Issue