for-5.6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl44NzMACgkQxWXV+ddt WDvSMA/+MYGBB/+HIET55a70hoLZR1n21joLPYat+RZnY//vBpVQtHewIHNoqBi3 EWY7rB+OWomIMiHRw4gS4nKWpds3Ou8minUnAVmbP86irfu2e1mip9rQWT6EO7ne KWd52hb41M4ZG2Oq/2XZdMu49IGUIgUBShioJQAN7VimTI8XSX1mQn0N9pvkRk3s IX/77kmf5jolO3/hZOJDCN6+LsI1inN6TkEH8ODKC+0ounGN+TcQQydJlfjZ+4n/ BH5G9mpm5FmFQWKp14vfyc2jknwoO9ryd7Mez5Vf70xuFCMQw+Z0ZKyLDeMLGhur dCV3j57/+XUwsSflT/Q/cmIQZyXIdmShOxcHi9QMnax5lf6XrMgkvEjjzfGGzlzU ey8f8hTkqkH7KM89G8pvla+DN1It4xzs9kLYczS49pWT5qn/15l7FcRMdwOR37mU QFcDTfXEhQ5wPbpaNYDWGVycFugyyxgxBgpnSbOpgmvVS/i3qJoChUXGJrM3HMyx Xsej/oLUnYsBEBe20mEaVp/j288NnQdMo58C+BRGtUEMC4QZM/tg3HUmGJCXqGI1 PXJx8qPPs1ZR4U4ciuWwDAim27LlD04NMGlf/r3ABFaPLMAiPKaVR93Ny0nIEkBA iPHJD5xVKKmhJnRAWVxz3ZyyRGbjG9IB6syawYsSWMu1qH8z150= =BuZc -----END PGP SIGNATURE----- Merge tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull more btrfs updates from David Sterba: "Fixes that arrived after the merge window freeze, mostly stable material. - fix race in tree-mod-log element tracking - fix bio flushing inside extent writepages - fix assertion when in-memory tracking of discarded extents finds an empty tree (eg. after adding a new device) - update logic of temporary read-only block groups to take into account overcommit - fix some fixup worker corner cases: - page could not go through proper COW cycle and the dirty status is lost due to page migration - deadlock if delayed allocation is performed under page lock - fix send emitting invalid clones within the same file - fix statfs reporting 0 free space when global block reserve size is larger than remaining free space but there is still space for new chunks" * tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: do not zero f_bavail if we have available space Btrfs: send, fix emission of invalid clone operations within the same file btrfs: do not do delalloc reservation under page lock btrfs: drop the -EBUSY case in __extent_writepage_io Btrfs: keep pages dirty when using btrfs_writepage_fixup_worker btrfs: take overcommit into account in inc_block_group_ro btrfs: fix force usage in inc_block_group_ro btrfs: Correctly handle empty trees in find_first_clear_extent_bit btrfs: flush write bio if we loop in extent_write_cache_pages Btrfs: fix race between adding and putting tree mod seq elements and nodes
This commit is contained in:
commit
ad80142836
|
@ -1191,7 +1191,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
||||||
{
|
{
|
||||||
struct btrfs_space_info *sinfo = cache->space_info;
|
struct btrfs_space_info *sinfo = cache->space_info;
|
||||||
u64 num_bytes;
|
u64 num_bytes;
|
||||||
u64 sinfo_used;
|
|
||||||
int ret = -ENOSPC;
|
int ret = -ENOSPC;
|
||||||
|
|
||||||
spin_lock(&sinfo->lock);
|
spin_lock(&sinfo->lock);
|
||||||
|
@ -1205,19 +1204,38 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
||||||
|
|
||||||
num_bytes = cache->length - cache->reserved - cache->pinned -
|
num_bytes = cache->length - cache->reserved - cache->pinned -
|
||||||
cache->bytes_super - cache->used;
|
cache->bytes_super - cache->used;
|
||||||
sinfo_used = btrfs_space_info_used(sinfo, true);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* sinfo_used + num_bytes should always <= sinfo->total_bytes.
|
* Data never overcommits, even in mixed mode, so do just the straight
|
||||||
*
|
* check of left over space in how much we have allocated.
|
||||||
* Here we make sure if we mark this bg RO, we still have enough
|
|
||||||
* free space as buffer.
|
|
||||||
*/
|
*/
|
||||||
if (sinfo_used + num_bytes <= sinfo->total_bytes) {
|
if (force) {
|
||||||
|
ret = 0;
|
||||||
|
} else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
|
||||||
|
u64 sinfo_used = btrfs_space_info_used(sinfo, true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Here we make sure if we mark this bg RO, we still have enough
|
||||||
|
* free space as buffer.
|
||||||
|
*/
|
||||||
|
if (sinfo_used + num_bytes <= sinfo->total_bytes)
|
||||||
|
ret = 0;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* We overcommit metadata, so we need to do the
|
||||||
|
* btrfs_can_overcommit check here, and we need to pass in
|
||||||
|
* BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of
|
||||||
|
* leeway to allow us to mark this block group as read only.
|
||||||
|
*/
|
||||||
|
if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
|
||||||
|
BTRFS_RESERVE_NO_FLUSH))
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
sinfo->bytes_readonly += num_bytes;
|
sinfo->bytes_readonly += num_bytes;
|
||||||
cache->ro++;
|
cache->ro++;
|
||||||
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
|
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
|
||||||
ret = 0;
|
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
spin_unlock(&cache->lock);
|
spin_unlock(&cache->lock);
|
||||||
|
@ -1225,9 +1243,6 @@ out:
|
||||||
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
|
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
|
||||||
btrfs_info(cache->fs_info,
|
btrfs_info(cache->fs_info,
|
||||||
"unable to make block group %llu ro", cache->start);
|
"unable to make block group %llu ro", cache->start);
|
||||||
btrfs_info(cache->fs_info,
|
|
||||||
"sinfo_used=%llu bg_num_bytes=%llu",
|
|
||||||
sinfo_used, num_bytes);
|
|
||||||
btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
|
btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -2225,7 +2240,7 @@ again:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = inc_block_group_ro(cache, !do_chunk_alloc);
|
ret = inc_block_group_ro(cache, 0);
|
||||||
if (!do_chunk_alloc)
|
if (!do_chunk_alloc)
|
||||||
goto unlock_out;
|
goto unlock_out;
|
||||||
if (!ret)
|
if (!ret)
|
||||||
|
|
|
@ -326,12 +326,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
||||||
struct seq_list *elem)
|
struct seq_list *elem)
|
||||||
{
|
{
|
||||||
write_lock(&fs_info->tree_mod_log_lock);
|
write_lock(&fs_info->tree_mod_log_lock);
|
||||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
|
||||||
if (!elem->seq) {
|
if (!elem->seq) {
|
||||||
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
|
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
|
||||||
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
|
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
|
||||||
}
|
}
|
||||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
|
||||||
write_unlock(&fs_info->tree_mod_log_lock);
|
write_unlock(&fs_info->tree_mod_log_lock);
|
||||||
|
|
||||||
return elem->seq;
|
return elem->seq;
|
||||||
|
@ -351,7 +349,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
||||||
if (!seq_putting)
|
if (!seq_putting)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
write_lock(&fs_info->tree_mod_log_lock);
|
||||||
list_del(&elem->list);
|
list_del(&elem->list);
|
||||||
elem->seq = 0;
|
elem->seq = 0;
|
||||||
|
|
||||||
|
@ -362,19 +360,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
||||||
* blocker with lower sequence number exists, we
|
* blocker with lower sequence number exists, we
|
||||||
* cannot remove anything from the log
|
* cannot remove anything from the log
|
||||||
*/
|
*/
|
||||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
write_unlock(&fs_info->tree_mod_log_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
min_seq = cur_elem->seq;
|
min_seq = cur_elem->seq;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* anything that's lower than the lowest existing (read: blocked)
|
* anything that's lower than the lowest existing (read: blocked)
|
||||||
* sequence number can be removed from the tree.
|
* sequence number can be removed from the tree.
|
||||||
*/
|
*/
|
||||||
write_lock(&fs_info->tree_mod_log_lock);
|
|
||||||
tm_root = &fs_info->tree_mod_log;
|
tm_root = &fs_info->tree_mod_log;
|
||||||
for (node = rb_first(tm_root); node; node = next) {
|
for (node = rb_first(tm_root); node; node = next) {
|
||||||
next = rb_next(node);
|
next = rb_next(node);
|
||||||
|
|
|
@ -714,14 +714,12 @@ struct btrfs_fs_info {
|
||||||
atomic_t nr_delayed_iputs;
|
atomic_t nr_delayed_iputs;
|
||||||
wait_queue_head_t delayed_iputs_wait;
|
wait_queue_head_t delayed_iputs_wait;
|
||||||
|
|
||||||
/* this protects tree_mod_seq_list */
|
|
||||||
spinlock_t tree_mod_seq_lock;
|
|
||||||
atomic64_t tree_mod_seq;
|
atomic64_t tree_mod_seq;
|
||||||
struct list_head tree_mod_seq_list;
|
|
||||||
|
|
||||||
/* this protects tree_mod_log */
|
/* this protects tree_mod_log and tree_mod_seq_list */
|
||||||
rwlock_t tree_mod_log_lock;
|
rwlock_t tree_mod_log_lock;
|
||||||
struct rb_root tree_mod_log;
|
struct rb_root tree_mod_log;
|
||||||
|
struct list_head tree_mod_seq_list;
|
||||||
|
|
||||||
atomic_t async_delalloc_pages;
|
atomic_t async_delalloc_pages;
|
||||||
|
|
||||||
|
|
|
@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||||
if (head->is_data)
|
if (head->is_data)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
read_lock(&fs_info->tree_mod_log_lock);
|
||||||
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
||||||
struct seq_list *elem;
|
struct seq_list *elem;
|
||||||
|
|
||||||
|
@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||||
struct seq_list, list);
|
struct seq_list, list);
|
||||||
seq = elem->seq;
|
seq = elem->seq;
|
||||||
}
|
}
|
||||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
read_unlock(&fs_info->tree_mod_log_lock);
|
||||||
|
|
||||||
again:
|
again:
|
||||||
for (node = rb_first_cached(&head->ref_tree); node;
|
for (node = rb_first_cached(&head->ref_tree); node;
|
||||||
|
@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
|
||||||
struct seq_list *elem;
|
struct seq_list *elem;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
read_lock(&fs_info->tree_mod_log_lock);
|
||||||
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
||||||
elem = list_first_entry(&fs_info->tree_mod_seq_list,
|
elem = list_first_entry(&fs_info->tree_mod_seq_list,
|
||||||
struct seq_list, list);
|
struct seq_list, list);
|
||||||
|
@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
read_unlock(&fs_info->tree_mod_log_lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2697,7 +2697,6 @@ int __cold open_ctree(struct super_block *sb,
|
||||||
spin_lock_init(&fs_info->fs_roots_radix_lock);
|
spin_lock_init(&fs_info->fs_roots_radix_lock);
|
||||||
spin_lock_init(&fs_info->delayed_iput_lock);
|
spin_lock_init(&fs_info->delayed_iput_lock);
|
||||||
spin_lock_init(&fs_info->defrag_inodes_lock);
|
spin_lock_init(&fs_info->defrag_inodes_lock);
|
||||||
spin_lock_init(&fs_info->tree_mod_seq_lock);
|
|
||||||
spin_lock_init(&fs_info->super_lock);
|
spin_lock_init(&fs_info->super_lock);
|
||||||
spin_lock_init(&fs_info->buffer_lock);
|
spin_lock_init(&fs_info->buffer_lock);
|
||||||
spin_lock_init(&fs_info->unused_bgs_lock);
|
spin_lock_init(&fs_info->unused_bgs_lock);
|
||||||
|
|
|
@ -1593,21 +1593,25 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||||
/* Find first extent with bits cleared */
|
/* Find first extent with bits cleared */
|
||||||
while (1) {
|
while (1) {
|
||||||
node = __etree_search(tree, start, &next, &prev, NULL, NULL);
|
node = __etree_search(tree, start, &next, &prev, NULL, NULL);
|
||||||
if (!node) {
|
if (!node && !next && !prev) {
|
||||||
|
/*
|
||||||
|
* Tree is completely empty, send full range and let
|
||||||
|
* caller deal with it
|
||||||
|
*/
|
||||||
|
*start_ret = 0;
|
||||||
|
*end_ret = -1;
|
||||||
|
goto out;
|
||||||
|
} else if (!node && !next) {
|
||||||
|
/*
|
||||||
|
* We are past the last allocated chunk, set start at
|
||||||
|
* the end of the last extent.
|
||||||
|
*/
|
||||||
|
state = rb_entry(prev, struct extent_state, rb_node);
|
||||||
|
*start_ret = state->end + 1;
|
||||||
|
*end_ret = -1;
|
||||||
|
goto out;
|
||||||
|
} else if (!node) {
|
||||||
node = next;
|
node = next;
|
||||||
if (!node) {
|
|
||||||
/*
|
|
||||||
* We are past the last allocated chunk,
|
|
||||||
* set start at the end of the last extent. The
|
|
||||||
* device alloc tree should never be empty so
|
|
||||||
* prev is always set.
|
|
||||||
*/
|
|
||||||
ASSERT(prev);
|
|
||||||
state = rb_entry(prev, struct extent_state, rb_node);
|
|
||||||
*start_ret = state->end + 1;
|
|
||||||
*end_ret = -1;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* At this point 'node' either contains 'start' or start is
|
* At this point 'node' either contains 'start' or start is
|
||||||
|
@ -3438,11 +3442,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||||
ret = btrfs_writepage_cow_fixup(page, start, page_end);
|
ret = btrfs_writepage_cow_fixup(page, start, page_end);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* Fixup worker will requeue */
|
/* Fixup worker will requeue */
|
||||||
if (ret == -EBUSY)
|
redirty_page_for_writepage(wbc, page);
|
||||||
wbc->pages_skipped++;
|
|
||||||
else
|
|
||||||
redirty_page_for_writepage(wbc, page);
|
|
||||||
|
|
||||||
update_nr_written(wbc, nr_written);
|
update_nr_written(wbc, nr_written);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -4166,7 +4166,16 @@ retry:
|
||||||
*/
|
*/
|
||||||
scanned = 1;
|
scanned = 1;
|
||||||
index = 0;
|
index = 0;
|
||||||
goto retry;
|
|
||||||
|
/*
|
||||||
|
* If we're looping we could run into a page that is locked by a
|
||||||
|
* writer and that writer could be waiting on writeback for a
|
||||||
|
* page in our current bio, and thus deadlock, so flush the
|
||||||
|
* write bio here.
|
||||||
|
*/
|
||||||
|
ret = flush_write_bio(epd);
|
||||||
|
if (!ret)
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
|
if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
|
||||||
|
|
121
fs/btrfs/inode.c
121
fs/btrfs/inode.c
|
@ -2189,6 +2189,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
|
||||||
/* see btrfs_writepage_start_hook for details on why this is required */
|
/* see btrfs_writepage_start_hook for details on why this is required */
|
||||||
struct btrfs_writepage_fixup {
|
struct btrfs_writepage_fixup {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
struct inode *inode;
|
||||||
struct btrfs_work work;
|
struct btrfs_work work;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2202,27 +2203,71 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
u64 page_start;
|
u64 page_start;
|
||||||
u64 page_end;
|
u64 page_end;
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
bool free_delalloc_space = true;
|
||||||
|
|
||||||
fixup = container_of(work, struct btrfs_writepage_fixup, work);
|
fixup = container_of(work, struct btrfs_writepage_fixup, work);
|
||||||
page = fixup->page;
|
page = fixup->page;
|
||||||
|
inode = fixup->inode;
|
||||||
|
page_start = page_offset(page);
|
||||||
|
page_end = page_offset(page) + PAGE_SIZE - 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is similar to page_mkwrite, we need to reserve the space before
|
||||||
|
* we take the page lock.
|
||||||
|
*/
|
||||||
|
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
|
||||||
|
PAGE_SIZE);
|
||||||
again:
|
again:
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Before we queued this fixup, we took a reference on the page.
|
||||||
|
* page->mapping may go NULL, but it shouldn't be moved to a different
|
||||||
|
* address space.
|
||||||
|
*/
|
||||||
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
|
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
|
||||||
ClearPageChecked(page);
|
/*
|
||||||
|
* Unfortunately this is a little tricky, either
|
||||||
|
*
|
||||||
|
* 1) We got here and our page had already been dealt with and
|
||||||
|
* we reserved our space, thus ret == 0, so we need to just
|
||||||
|
* drop our space reservation and bail. This can happen the
|
||||||
|
* first time we come into the fixup worker, or could happen
|
||||||
|
* while waiting for the ordered extent.
|
||||||
|
* 2) Our page was already dealt with, but we happened to get an
|
||||||
|
* ENOSPC above from the btrfs_delalloc_reserve_space. In
|
||||||
|
* this case we obviously don't have anything to release, but
|
||||||
|
* because the page was already dealt with we don't want to
|
||||||
|
* mark the page with an error, so make sure we're resetting
|
||||||
|
* ret to 0. This is why we have this check _before_ the ret
|
||||||
|
* check, because we do not want to have a surprise ENOSPC
|
||||||
|
* when the page was already properly dealt with.
|
||||||
|
*/
|
||||||
|
if (!ret) {
|
||||||
|
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||||
|
PAGE_SIZE);
|
||||||
|
btrfs_delalloc_release_space(inode, data_reserved,
|
||||||
|
page_start, PAGE_SIZE,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
ret = 0;
|
||||||
goto out_page;
|
goto out_page;
|
||||||
}
|
}
|
||||||
|
|
||||||
inode = page->mapping->host;
|
/*
|
||||||
page_start = page_offset(page);
|
* We can't mess with the page state unless it is locked, so now that
|
||||||
page_end = page_offset(page) + PAGE_SIZE - 1;
|
* it is locked bail if we failed to make our space reservation.
|
||||||
|
*/
|
||||||
|
if (ret)
|
||||||
|
goto out_page;
|
||||||
|
|
||||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
||||||
&cached_state);
|
&cached_state);
|
||||||
|
|
||||||
/* already ordered? We're done */
|
/* already ordered? We're done */
|
||||||
if (PagePrivate2(page))
|
if (PagePrivate2(page))
|
||||||
goto out;
|
goto out_reserved;
|
||||||
|
|
||||||
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
|
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
|
||||||
PAGE_SIZE);
|
PAGE_SIZE);
|
||||||
|
@ -2235,39 +2280,49 @@ again:
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
|
|
||||||
PAGE_SIZE);
|
|
||||||
if (ret) {
|
|
||||||
mapping_set_error(page->mapping, ret);
|
|
||||||
end_extent_writepage(page, ret, page_start, page_end);
|
|
||||||
ClearPageChecked(page);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
|
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
|
||||||
&cached_state);
|
&cached_state);
|
||||||
if (ret) {
|
if (ret)
|
||||||
mapping_set_error(page->mapping, ret);
|
|
||||||
end_extent_writepage(page, ret, page_start, page_end);
|
|
||||||
ClearPageChecked(page);
|
|
||||||
goto out_reserved;
|
goto out_reserved;
|
||||||
}
|
|
||||||
|
|
||||||
ClearPageChecked(page);
|
/*
|
||||||
set_page_dirty(page);
|
* Everything went as planned, we're now the owner of a dirty page with
|
||||||
|
* delayed allocation bits set and space reserved for our COW
|
||||||
|
* destination.
|
||||||
|
*
|
||||||
|
* The page was dirty when we started, nothing should have cleaned it.
|
||||||
|
*/
|
||||||
|
BUG_ON(!PageDirty(page));
|
||||||
|
free_delalloc_space = false;
|
||||||
out_reserved:
|
out_reserved:
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
|
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
|
||||||
if (ret)
|
if (free_delalloc_space)
|
||||||
btrfs_delalloc_release_space(inode, data_reserved, page_start,
|
btrfs_delalloc_release_space(inode, data_reserved, page_start,
|
||||||
PAGE_SIZE, true);
|
PAGE_SIZE, true);
|
||||||
out:
|
|
||||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
||||||
&cached_state);
|
&cached_state);
|
||||||
out_page:
|
out_page:
|
||||||
|
if (ret) {
|
||||||
|
/*
|
||||||
|
* We hit ENOSPC or other errors. Update the mapping and page
|
||||||
|
* to reflect the errors and clean the page.
|
||||||
|
*/
|
||||||
|
mapping_set_error(page->mapping, ret);
|
||||||
|
end_extent_writepage(page, ret, page_start, page_end);
|
||||||
|
clear_page_dirty_for_io(page);
|
||||||
|
SetPageError(page);
|
||||||
|
}
|
||||||
|
ClearPageChecked(page);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
kfree(fixup);
|
kfree(fixup);
|
||||||
extent_changeset_free(data_reserved);
|
extent_changeset_free(data_reserved);
|
||||||
|
/*
|
||||||
|
* As a precaution, do a delayed iput in case it would be the last iput
|
||||||
|
* that could need flushing space. Recursing back to fixup worker would
|
||||||
|
* deadlock.
|
||||||
|
*/
|
||||||
|
btrfs_add_delayed_iput(inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2291,6 +2346,13 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
|
||||||
if (TestClearPagePrivate2(page))
|
if (TestClearPagePrivate2(page))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PageChecked is set below when we create a fixup worker for this page,
|
||||||
|
* don't try to create another one if we're already PageChecked()
|
||||||
|
*
|
||||||
|
* The extent_io writepage code will redirty the page if we send back
|
||||||
|
* EAGAIN.
|
||||||
|
*/
|
||||||
if (PageChecked(page))
|
if (PageChecked(page))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
|
@ -2298,12 +2360,21 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
|
||||||
if (!fixup)
|
if (!fixup)
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are already holding a reference to this inode from
|
||||||
|
* write_cache_pages. We need to hold it because the space reservation
|
||||||
|
* takes place outside of the page lock, and we can't trust
|
||||||
|
* page->mapping outside of the page lock.
|
||||||
|
*/
|
||||||
|
ihold(inode);
|
||||||
SetPageChecked(page);
|
SetPageChecked(page);
|
||||||
get_page(page);
|
get_page(page);
|
||||||
btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
|
btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
|
||||||
fixup->page = page;
|
fixup->page = page;
|
||||||
|
fixup->inode = inode;
|
||||||
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
|
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
|
||||||
return -EBUSY;
|
|
||||||
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
|
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||||
|
|
|
@ -1269,7 +1269,8 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
|
||||||
* destination of the stream.
|
* destination of the stream.
|
||||||
*/
|
*/
|
||||||
if (ino == bctx->cur_objectid &&
|
if (ino == bctx->cur_objectid &&
|
||||||
offset >= bctx->sctx->cur_inode_next_write_offset)
|
offset + bctx->extent_len >
|
||||||
|
bctx->sctx->cur_inode_next_write_offset)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -159,9 +159,9 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
|
||||||
return (global->size << 1);
|
return (global->size << 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int can_overcommit(struct btrfs_fs_info *fs_info,
|
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info, u64 bytes,
|
struct btrfs_space_info *space_info, u64 bytes,
|
||||||
enum btrfs_reserve_flush_enum flush)
|
enum btrfs_reserve_flush_enum flush)
|
||||||
{
|
{
|
||||||
u64 profile;
|
u64 profile;
|
||||||
u64 avail;
|
u64 avail;
|
||||||
|
@ -226,7 +226,8 @@ again:
|
||||||
|
|
||||||
/* Check and see if our ticket can be satisified now. */
|
/* Check and see if our ticket can be satisified now. */
|
||||||
if ((used + ticket->bytes <= space_info->total_bytes) ||
|
if ((used + ticket->bytes <= space_info->total_bytes) ||
|
||||||
can_overcommit(fs_info, space_info, ticket->bytes, flush)) {
|
btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
|
||||||
|
flush)) {
|
||||||
btrfs_space_info_update_bytes_may_use(fs_info,
|
btrfs_space_info_update_bytes_may_use(fs_info,
|
||||||
space_info,
|
space_info,
|
||||||
ticket->bytes);
|
ticket->bytes);
|
||||||
|
@ -639,13 +640,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
||||||
return to_reclaim;
|
return to_reclaim;
|
||||||
|
|
||||||
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
|
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
|
||||||
if (can_overcommit(fs_info, space_info, to_reclaim,
|
if (btrfs_can_overcommit(fs_info, space_info, to_reclaim,
|
||||||
BTRFS_RESERVE_FLUSH_ALL))
|
BTRFS_RESERVE_FLUSH_ALL))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
used = btrfs_space_info_used(space_info, true);
|
used = btrfs_space_info_used(space_info, true);
|
||||||
|
|
||||||
if (can_overcommit(fs_info, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
|
if (btrfs_can_overcommit(fs_info, space_info, SZ_1M,
|
||||||
|
BTRFS_RESERVE_FLUSH_ALL))
|
||||||
expected = div_factor_fine(space_info->total_bytes, 95);
|
expected = div_factor_fine(space_info->total_bytes, 95);
|
||||||
else
|
else
|
||||||
expected = div_factor_fine(space_info->total_bytes, 90);
|
expected = div_factor_fine(space_info->total_bytes, 90);
|
||||||
|
@ -1004,7 +1006,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||||
*/
|
*/
|
||||||
if (!pending_tickets &&
|
if (!pending_tickets &&
|
||||||
((used + orig_bytes <= space_info->total_bytes) ||
|
((used + orig_bytes <= space_info->total_bytes) ||
|
||||||
can_overcommit(fs_info, space_info, orig_bytes, flush))) {
|
btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
|
||||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
||||||
orig_bytes);
|
orig_bytes);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
|
@ -127,6 +127,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
|
||||||
enum btrfs_reserve_flush_enum flush);
|
enum btrfs_reserve_flush_enum flush);
|
||||||
void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
|
void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info);
|
struct btrfs_space_info *space_info);
|
||||||
|
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
||||||
|
struct btrfs_space_info *space_info, u64 bytes,
|
||||||
|
enum btrfs_reserve_flush_enum flush);
|
||||||
|
|
||||||
static inline void btrfs_space_info_free_bytes_may_use(
|
static inline void btrfs_space_info_free_bytes_may_use(
|
||||||
struct btrfs_fs_info *fs_info,
|
struct btrfs_fs_info *fs_info,
|
||||||
|
|
|
@ -2135,7 +2135,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||||
*/
|
*/
|
||||||
thresh = SZ_4M;
|
thresh = SZ_4M;
|
||||||
|
|
||||||
if (!mixed && total_free_meta - thresh < block_rsv->size)
|
/*
|
||||||
|
* We only want to claim there's no available space if we can no longer
|
||||||
|
* allocate chunks for our metadata profile and our global reserve will
|
||||||
|
* not fit in the free metadata space. If we aren't ->full then we
|
||||||
|
* still can allocate chunks and thus are fine using the currently
|
||||||
|
* calculated f_bavail.
|
||||||
|
*/
|
||||||
|
if (!mixed && block_rsv->space_info->full &&
|
||||||
|
total_free_meta - thresh < block_rsv->size)
|
||||||
buf->f_bavail = 0;
|
buf->f_bavail = 0;
|
||||||
|
|
||||||
buf->f_type = BTRFS_SUPER_MAGIC;
|
buf->f_type = BTRFS_SUPER_MAGIC;
|
||||||
|
|
|
@ -142,7 +142,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||||
spin_lock_init(&fs_info->qgroup_lock);
|
spin_lock_init(&fs_info->qgroup_lock);
|
||||||
spin_lock_init(&fs_info->super_lock);
|
spin_lock_init(&fs_info->super_lock);
|
||||||
spin_lock_init(&fs_info->fs_roots_radix_lock);
|
spin_lock_init(&fs_info->fs_roots_radix_lock);
|
||||||
spin_lock_init(&fs_info->tree_mod_seq_lock);
|
|
||||||
mutex_init(&fs_info->qgroup_ioctl_lock);
|
mutex_init(&fs_info->qgroup_ioctl_lock);
|
||||||
mutex_init(&fs_info->qgroup_rescan_lock);
|
mutex_init(&fs_info->qgroup_rescan_lock);
|
||||||
rwlock_init(&fs_info->tree_mod_log_lock);
|
rwlock_init(&fs_info->tree_mod_log_lock);
|
||||||
|
|
|
@ -441,8 +441,17 @@ static int test_find_first_clear_extent_bit(void)
|
||||||
int ret = -EINVAL;
|
int ret = -EINVAL;
|
||||||
|
|
||||||
test_msg("running find_first_clear_extent_bit test");
|
test_msg("running find_first_clear_extent_bit test");
|
||||||
|
|
||||||
extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
|
extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
|
||||||
|
|
||||||
|
/* Test correct handling of empty tree */
|
||||||
|
find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
|
||||||
|
if (start != 0 || end != -1) {
|
||||||
|
test_err(
|
||||||
|
"error getting a range from completely empty tree: start %llu end %llu",
|
||||||
|
start, end);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
|
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
|
||||||
* 4M-32M
|
* 4M-32M
|
||||||
|
|
Loading…
Reference in New Issue