for-4.20-rc1-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAlvoGIUACgkQxWXV+ddt WDta6g//UJSLnVskCUwh8VyMdd47QArQnaLJowOH7wQn4Nqj+2hf04mCq/kv05ed OneTezzONZc/qW9fiJGS+Dp77ln4JIDA1hWHtb/A4t9pYlksSQllJ3oiDUVsCp3q 2EbzrjuNz3iQO6TjKlaHX473CLCMQMXS2OXOUnCkF2maMJSdr86oi+j1UiSnud1/ C7uMYM3hG8nkfEfjjb1COpkS2MmzYcPruF5RDcbT/WOUfylTsjjX1E7rK/ZEqS9P SUcp4uoZe9BNoyWMASLaM7oHE82day4X9MwQoCQFRcm0kq4CnRAZ8X4lBl+M70iW 7Olii/wNZ2SRiJf3jac/rpxoBHvEskXTHyiHTEmdHp4n1L1pL9GzGYIePQcX7uV1 Tb6ImdUUKCC//fPqyeB7cEk5yxqahmlFD3qZVs6GnQkzKrPE+ChLx+7PgcJC/XVh C5ogNmJm+NvFOuTrYk9zSXg85B8gWHescDJrvNKVizIjw3nKmqiC+dXZljhzw+p8 HscK9EXsiS8jW9ClfJljXzIa4SeA/i7fQGe4tCKfIrCQ+OqUxWpFCEoxygchinfF Rw90fJ0jX083oXsnfFcVdQpQ+SLSKka/aIRMvi58WRgLU3trci5NNN4TFg8TYRKP xBDF/iF3sqXajc+xsjoqLhLioZL3Pa5VDNuhsFdois9M5JSRekU= =K14u -----END PGP SIGNATURE----- Merge tag 'for-4.20-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "Several fixes to recent release (4.19, fixes tagged for stable) and other fixes" * tag 'for-4.20-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: Btrfs: fix missing delayed iputs on unmount Btrfs: fix data corruption due to cloning of eof block Btrfs: fix infinite loop on inode eviction after deduplication of eof block Btrfs: fix deadlock on tree root leaf when finding free extent btrfs: avoid link error with CONFIG_NO_AUTO_INLINE btrfs: tree-checker: Fix misleading group system information Btrfs: fix missing data checksums after a ranged fsync (msync) btrfs: fix pinned underflow after transaction aborted Btrfs: fix cur_offset in the error case for nocow
This commit is contained in:
commit
63a42e1a5c
|
@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
|
|||
int btrfs_drop_inode(struct inode *inode);
|
||||
int __init btrfs_init_cachep(void);
|
||||
void __cold btrfs_destroy_cachep(void);
|
||||
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
|
||||
struct btrfs_root *root, int *new,
|
||||
struct btrfs_path *path);
|
||||
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
||||
struct btrfs_root *root, int *was_new);
|
||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
|
|
|
@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
|
|||
struct btrfs_root *root = arg;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
int again;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
do {
|
||||
while (1) {
|
||||
again = 0;
|
||||
|
||||
/* Make the cleaner go to sleep early. */
|
||||
|
@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
|
|||
*/
|
||||
btrfs_delete_unused_bgs(fs_info);
|
||||
sleep:
|
||||
if (kthread_should_park())
|
||||
kthread_parkme();
|
||||
if (kthread_should_stop())
|
||||
return 0;
|
||||
if (!again) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (!kthread_should_stop())
|
||||
schedule();
|
||||
schedule();
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
} while (!kthread_should_stop());
|
||||
|
||||
/*
|
||||
* Transaction kthread is stopped before us and wakes us up.
|
||||
* However we might have started a new transaction and COWed some
|
||||
* tree blocks when deleting unused block groups for example. So
|
||||
* make sure we commit the transaction we started to have a clean
|
||||
* shutdown when evicting the btree inode - if it has dirty pages
|
||||
* when we do the final iput() on it, eviction will trigger a
|
||||
* writeback for it which will fail with null pointer dereferences
|
||||
* since work queues and other resources were already released and
|
||||
* destroyed by the time the iput/eviction/writeback is made.
|
||||
*/
|
||||
trans = btrfs_attach_transaction(root);
|
||||
if (IS_ERR(trans)) {
|
||||
if (PTR_ERR(trans) != -ENOENT)
|
||||
btrfs_err(fs_info,
|
||||
"cleaner transaction attach returned %ld",
|
||||
PTR_ERR(trans));
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret)
|
||||
btrfs_err(fs_info,
|
||||
"cleaner open transaction commit returned %d",
|
||||
ret);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int transaction_kthread(void *arg)
|
||||
|
@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
|
|||
int ret;
|
||||
|
||||
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
|
||||
/*
|
||||
* We don't want the cleaner to start new transactions, add more delayed
|
||||
* iputs, etc. while we're closing. We can't use kthread_stop() yet
|
||||
* because that frees the task_struct, and the transaction kthread might
|
||||
* still try to wake up the cleaner.
|
||||
*/
|
||||
kthread_park(fs_info->cleaner_kthread);
|
||||
|
||||
/* wait for the qgroup rescan worker to stop */
|
||||
btrfs_qgroup_wait_for_completion(fs_info, false);
|
||||
|
@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
|
|||
|
||||
if (!sb_rdonly(fs_info->sb)) {
|
||||
/*
|
||||
* If the cleaner thread is stopped and there are
|
||||
* block groups queued for removal, the deletion will be
|
||||
* skipped when we quit the cleaner thread.
|
||||
* The cleaner kthread is stopped, so do one final pass over
|
||||
* unused block groups.
|
||||
*/
|
||||
btrfs_delete_unused_bgs(fs_info);
|
||||
|
||||
|
@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
|||
unpin = pinned_extents;
|
||||
again:
|
||||
while (1) {
|
||||
/*
|
||||
* The btrfs_finish_extent_commit() may get the same range as
|
||||
* ours between find_first_extent_bit and clear_extent_dirty.
|
||||
* Hence, hold the unused_bg_unpin_mutex to avoid double unpin
|
||||
* the same extent range.
|
||||
*/
|
||||
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
||||
ret = find_first_extent_bit(unpin, 0, &start, &end,
|
||||
EXTENT_DIRTY, NULL);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
clear_extent_dirty(unpin, start, end);
|
||||
btrfs_error_unpin_extent_range(fs_info, start, end);
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
|
|
|
@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
|
|||
* sure NOFS is set to keep us from deadlocking.
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
inode = btrfs_iget(fs_info->sb, &location, root, NULL);
|
||||
inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
|
||||
btrfs_release_path(path);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (IS_ERR(inode))
|
||||
return inode;
|
||||
|
@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
|
|||
path->search_commit_root = 1;
|
||||
path->skip_locking = 1;
|
||||
|
||||
/*
|
||||
* We must pass a path with search_commit_root set to btrfs_iget in
|
||||
* order to avoid a deadlock when allocating extents for the tree root.
|
||||
*
|
||||
* When we are COWing an extent buffer from the tree root, when looking
|
||||
* for a free extent, at extent-tree.c:find_free_extent(), we can find
|
||||
* block group without its free space cache loaded. When we find one
|
||||
* we must load its space cache which requires reading its free space
|
||||
* cache's inode item from the root tree. If this inode item is located
|
||||
* in the same leaf that we started COWing before, then we end up in
|
||||
* deadlock on the extent buffer (trying to read lock it when we
|
||||
* previously write locked it).
|
||||
*
|
||||
* It's safe to read the inode item using the commit root because
|
||||
* block groups, once loaded, stay in memory forever (until they are
|
||||
* removed) as well as their space caches once loaded. New block groups
|
||||
* once created get their ->cached field set to BTRFS_CACHE_FINISHED so
|
||||
* we will never try to read their inode item while the fs is mounted.
|
||||
*/
|
||||
inode = lookup_free_space_inode(fs_info, block_group, path);
|
||||
if (IS_ERR(inode)) {
|
||||
btrfs_free_path(path);
|
||||
|
|
|
@ -1531,12 +1531,11 @@ out_check:
|
|||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (cur_offset <= end && cow_start == (u64)-1) {
|
||||
if (cur_offset <= end && cow_start == (u64)-1)
|
||||
cow_start = cur_offset;
|
||||
cur_offset = end;
|
||||
}
|
||||
|
||||
if (cow_start != (u64)-1) {
|
||||
cur_offset = end;
|
||||
ret = cow_file_range(inode, locked_page, cow_start, end, end,
|
||||
page_started, nr_written, 1, NULL);
|
||||
if (ret)
|
||||
|
@ -3570,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
|
|||
/*
|
||||
* read an inode from the btree into the in-memory inode
|
||||
*/
|
||||
static int btrfs_read_locked_inode(struct inode *inode)
|
||||
static int btrfs_read_locked_inode(struct inode *inode,
|
||||
struct btrfs_path *in_path)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_path *path = in_path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_inode_item *inode_item;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
@ -3589,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
|
|||
if (!ret)
|
||||
filled = true;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
if (!path) {
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
|
||||
|
||||
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
|
||||
if (ret) {
|
||||
btrfs_free_path(path);
|
||||
if (path != in_path)
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3722,7 +3725,8 @@ cache_acl:
|
|||
btrfs_ino(BTRFS_I(inode)),
|
||||
root->root_key.objectid, ret);
|
||||
}
|
||||
btrfs_free_path(path);
|
||||
if (path != in_path)
|
||||
btrfs_free_path(path);
|
||||
|
||||
if (!maybe_acls)
|
||||
cache_no_acl(inode);
|
||||
|
@ -5644,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
|
|||
/* Get an inode object given its location and corresponding root.
|
||||
* Returns in *is_new if the inode was read from disk
|
||||
*/
|
||||
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
||||
struct btrfs_root *root, int *new)
|
||||
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
|
||||
struct btrfs_root *root, int *new,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
|
@ -5656,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
|||
if (inode->i_state & I_NEW) {
|
||||
int ret;
|
||||
|
||||
ret = btrfs_read_locked_inode(inode);
|
||||
ret = btrfs_read_locked_inode(inode, path);
|
||||
if (!ret) {
|
||||
inode_tree_add(inode);
|
||||
unlock_new_inode(inode);
|
||||
|
@ -5678,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
|||
return inode;
|
||||
}
|
||||
|
||||
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
||||
struct btrfs_root *root, int *new)
|
||||
{
|
||||
return btrfs_iget_path(s, location, root, new, NULL);
|
||||
}
|
||||
|
||||
static struct inode *new_simple_dir(struct super_block *s,
|
||||
struct btrfs_key *key,
|
||||
struct btrfs_root *root)
|
||||
|
|
|
@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
|
|||
const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
|
||||
|
||||
len = round_down(i_size_read(src), sz) - loff;
|
||||
if (len == 0)
|
||||
return 0;
|
||||
olen = len;
|
||||
}
|
||||
}
|
||||
|
@ -4257,9 +4259,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
|||
goto out_unlock;
|
||||
if (len == 0)
|
||||
olen = len = src->i_size - off;
|
||||
/* if we extend to eof, continue to block boundary */
|
||||
if (off + len == src->i_size)
|
||||
/*
|
||||
* If we extend to eof, continue to block boundary if and only if the
|
||||
* destination end offset matches the destination file's size, otherwise
|
||||
* we would be corrupting data by placing the eof block into the middle
|
||||
* of a file.
|
||||
*/
|
||||
if (off + len == src->i_size) {
|
||||
if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
|
||||
goto out_unlock;
|
||||
len = ALIGN(src->i_size, bs) - off;
|
||||
}
|
||||
|
||||
if (len == 0) {
|
||||
ret = 0;
|
||||
|
|
|
@ -1916,7 +1916,7 @@ restore:
|
|||
}
|
||||
|
||||
/* Used to sort the devices by max_avail(descending sort) */
|
||||
static int btrfs_cmp_device_free_bytes(const void *dev_info1,
|
||||
static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
|
||||
const void *dev_info2)
|
||||
{
|
||||
if (((struct btrfs_device_info *)dev_info1)->max_avail >
|
||||
|
@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
|
|||
* The helper to calc the free space on the devices that can be used to store
|
||||
* file data.
|
||||
*/
|
||||
static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
|
||||
u64 *free_bytes)
|
||||
static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
|
||||
u64 *free_bytes)
|
||||
{
|
||||
struct btrfs_device_info *devices_info;
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
|
|
|
@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
|
|||
type != (BTRFS_BLOCK_GROUP_METADATA |
|
||||
BTRFS_BLOCK_GROUP_DATA)) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
|
||||
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
|
||||
type, hweight64(type),
|
||||
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
|
||||
BTRFS_BLOCK_GROUP_SYSTEM,
|
||||
|
|
|
@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
|||
logged_end = end;
|
||||
|
||||
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
|
||||
/*
|
||||
* Skip extents outside our logging range. It's important to do
|
||||
* it for correctness because if we don't ignore them, we may
|
||||
* log them before their ordered extent completes, and therefore
|
||||
* we could log them without logging their respective checksums
|
||||
* (the checksum items are added to the csum tree at the very
|
||||
* end of btrfs_finish_ordered_io()). Also leave such extents
|
||||
* outside of our range in the list, since we may have another
|
||||
* ranged fsync in the near future that needs them. If an extent
|
||||
* outside our range corresponds to a hole, log it to avoid
|
||||
* leaving gaps between extents (fsck will complain when we are
|
||||
* not using the NO_HOLES feature).
|
||||
*/
|
||||
if ((em->start > end || em->start + em->len <= start) &&
|
||||
em->block_start != EXTENT_MAP_HOLE)
|
||||
continue;
|
||||
|
||||
list_del_init(&em->list);
|
||||
/*
|
||||
* Just an arbitrary number, this can be really CPU intensive
|
||||
|
|
Loading…
Reference in New Issue