Btrfs: introduce btrfs_{start, end}_nocow_write() for each subvolume

If the snapshot creation happened after the nocow write but before the dirty
data flush, we would fail to flush the dirty data because of no space.

So we must keep track of when those nocow write operations start and when they
end, if there are nocow writers, the snapshot creators must wait. In order
to implement this function, I introduce btrfs_{start, end}_nocow_write(),
which is similar to mnt_{want,drop}_write().

These two functions are only used for nocow file write operations.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
This commit is contained in:
Miao Xie 2014-03-06 13:38:19 +08:00 committed by Josef Bacik
parent 52483bc26f
commit 8257b2dc3c
5 changed files with 133 additions and 10 deletions

View File

@ -1681,6 +1681,11 @@ struct btrfs_fs_info {
unsigned int update_uuid_tree_gen:1; unsigned int update_uuid_tree_gen:1;
}; };
struct btrfs_subvolume_writers {
struct percpu_counter counter;
wait_queue_head_t wait;
};
/* /*
* in ram representation of the tree. extent_root is used for all allocations * in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root. * and for the extent tree extent_root root.
@ -1829,6 +1834,8 @@ struct btrfs_root {
* manipulation with the read-only status via SUBVOL_SETFLAGS * manipulation with the read-only status via SUBVOL_SETFLAGS
*/ */
int send_in_progress; int send_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshoted;
}; };
struct btrfs_ioctl_defrag_range_args { struct btrfs_ioctl_defrag_range_args {
@ -3353,6 +3360,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info); struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags); int __get_raid_index(u64 flags);
int btrfs_start_nocow_write(struct btrfs_root *root);
void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */ /* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot); int level, int *slot);

View File

@ -1149,6 +1149,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} }
} }
static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
{
struct btrfs_subvolume_writers *writers;
int ret;
writers = kmalloc(sizeof(*writers), GFP_NOFS);
if (!writers)
return ERR_PTR(-ENOMEM);
ret = percpu_counter_init(&writers->counter, 0);
if (ret < 0) {
kfree(writers);
return ERR_PTR(ret);
}
init_waitqueue_head(&writers->wait);
return writers;
}
static void
btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
{
percpu_counter_destroy(&writers->counter);
kfree(writers);
}
static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
u32 stripesize, struct btrfs_root *root, u32 stripesize, struct btrfs_root *root,
struct btrfs_fs_info *fs_info, struct btrfs_fs_info *fs_info,
@ -1205,6 +1231,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_batch, 0); atomic_set(&root->log_batch, 0);
atomic_set(&root->orphan_inodes, 0); atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1); atomic_set(&root->refs, 1);
atomic_set(&root->will_be_snapshoted, 0);
root->log_transid = 0; root->log_transid = 0;
root->log_transid_committed = -1; root->log_transid_committed = -1;
root->last_log_commit = 0; root->last_log_commit = 0;
@ -1502,6 +1529,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
int btrfs_init_fs_root(struct btrfs_root *root) int btrfs_init_fs_root(struct btrfs_root *root)
{ {
int ret; int ret;
struct btrfs_subvolume_writers *writers;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
@ -1511,6 +1539,13 @@ int btrfs_init_fs_root(struct btrfs_root *root)
goto fail; goto fail;
} }
writers = btrfs_alloc_subvolume_writers();
if (IS_ERR(writers)) {
ret = PTR_ERR(writers);
goto fail;
}
root->subv_writers = writers;
btrfs_init_free_ino_ctl(root); btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex); mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock); spin_lock_init(&root->cache_lock);
@ -1518,8 +1553,11 @@ int btrfs_init_fs_root(struct btrfs_root *root)
ret = get_anon_bdev(&root->anon_dev); ret = get_anon_bdev(&root->anon_dev);
if (ret) if (ret)
goto fail; goto free_writers;
return 0; return 0;
free_writers:
btrfs_free_subvolume_writers(root->subv_writers);
fail: fail:
kfree(root->free_ino_ctl); kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned); kfree(root->free_ino_pinned);
@ -3459,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root)
root->orphan_block_rsv = NULL; root->orphan_block_rsv = NULL;
if (root->anon_dev) if (root->anon_dev)
free_anon_bdev(root->anon_dev); free_anon_bdev(root->anon_dev);
if (root->subv_writers)
btrfs_free_subvolume_writers(root->subv_writers);
free_extent_buffer(root->node); free_extent_buffer(root->node);
free_extent_buffer(root->commit_root); free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl); kfree(root->free_ino_ctl);

View File

@ -8938,3 +8938,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
range->len = trimmed; range->len = trimmed;
return ret; return ret;
} }
/*
* btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(),
* they are used to prevent the some tasks writing data into the page cache
* by nocow before the subvolume is snapshoted, but flush the data into
* the disk after the snapshot creation.
*/
void btrfs_end_nocow_write(struct btrfs_root *root)
{
percpu_counter_dec(&root->subv_writers->counter);
/*
* Make sure counter is updated before we wake up
* waiters.
*/
smp_mb();
if (waitqueue_active(&root->subv_writers->wait))
wake_up(&root->subv_writers->wait);
}
int btrfs_start_nocow_write(struct btrfs_root *root)
{
if (unlikely(atomic_read(&root->will_be_snapshoted)))
return 0;
percpu_counter_inc(&root->subv_writers->counter);
/*
* Make sure counter is updated before we check for snapshot creation.
*/
smp_mb();
if (unlikely(atomic_read(&root->will_be_snapshoted))) {
btrfs_end_nocow_write(root);
return 0;
}
return 1;
}

View File

@ -1410,6 +1410,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
u64 num_bytes; u64 num_bytes;
int ret; int ret;
ret = btrfs_start_nocow_write(root);
if (!ret)
return -ENOSPC;
lockstart = round_down(pos, root->sectorsize); lockstart = round_down(pos, root->sectorsize);
lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
@ -1427,11 +1431,13 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
num_bytes = lockend - lockstart + 1; num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
if (ret <= 0) if (ret <= 0) {
ret = 0; ret = 0;
else btrfs_end_nocow_write(root);
} else {
*write_bytes = min_t(size_t, *write_bytes , *write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart); num_bytes - pos + lockstart);
}
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
@ -1520,6 +1526,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (!only_release_metadata) if (!only_release_metadata)
btrfs_free_reserved_data_space(inode, btrfs_free_reserved_data_space(inode,
reserve_bytes); reserve_bytes);
else
btrfs_end_nocow_write(root);
break; break;
} }
@ -1608,6 +1616,9 @@ again:
} }
release_bytes = 0; release_bytes = 0;
if (only_release_metadata)
btrfs_end_nocow_write(root);
if (only_release_metadata && copied > 0) { if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize); u64 lockstart = round_down(pos, root->sectorsize);
u64 lockend = lockstart + u64 lockend = lockstart +
@ -1634,10 +1645,12 @@ again:
kfree(pages); kfree(pages);
if (release_bytes) { if (release_bytes) {
if (only_release_metadata) if (only_release_metadata) {
btrfs_end_nocow_write(root);
btrfs_delalloc_release_metadata(inode, release_bytes); btrfs_delalloc_release_metadata(inode, release_bytes);
else } else {
btrfs_delalloc_release_space(inode, release_bytes); btrfs_delalloc_release_space(inode, release_bytes);
}
} }
return num_written ? num_written : ret; return num_written ? num_written : ret;

View File

@ -611,6 +611,23 @@ fail:
return ret; return ret;
} }
static void btrfs_wait_nocow_write(struct btrfs_root *root)
{
s64 writers;
DEFINE_WAIT(wait);
do {
prepare_to_wait(&root->subv_writers->wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&root->subv_writers->counter);
if (writers)
schedule();
finish_wait(&root->subv_writers->wait, &wait);
} while (writers);
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir, static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry, char *name, int namelen, struct dentry *dentry, char *name, int namelen,
u64 *async_transid, bool readonly, u64 *async_transid, bool readonly,
@ -624,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!root->ref_cows) if (!root->ref_cows)
return -EINVAL; return -EINVAL;
atomic_inc(&root->will_be_snapshoted);
smp_mb__after_atomic_inc();
btrfs_wait_nocow_write(root);
ret = btrfs_start_delalloc_inodes(root, 0); ret = btrfs_start_delalloc_inodes(root, 0);
if (ret) if (ret)
return ret; goto out;
btrfs_wait_ordered_extents(root, -1); btrfs_wait_ordered_extents(root, -1);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot) if (!pending_snapshot) {
return -ENOMEM; ret = -ENOMEM;
goto out;
}
btrfs_init_block_rsv(&pending_snapshot->block_rsv, btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP); BTRFS_BLOCK_RSV_TEMP);
@ -649,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
&pending_snapshot->qgroup_reserved, &pending_snapshot->qgroup_reserved,
false); false);
if (ret) if (ret)
goto out; goto free;
pending_snapshot->dentry = dentry; pending_snapshot->dentry = dentry;
pending_snapshot->root = root; pending_snapshot->root = root;
@ -700,8 +723,10 @@ fail:
btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv, &pending_snapshot->block_rsv,
pending_snapshot->qgroup_reserved); pending_snapshot->qgroup_reserved);
out: free:
kfree(pending_snapshot); kfree(pending_snapshot);
out:
atomic_dec(&root->will_be_snapshoted);
return ret; return ret;
} }