for-5.17-rc1-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmHwDeMACgkQxWXV+ddt WDtdMQ//QFqkIB34zW5N3uX1xBFht/G/bCPNdGiK5YerjJZj1f6Rmsytbb6qlWHg NlB/XEPeQaQVrSfF37svnvATgySPaqePsufrT2XYu3x2w8muPTl460wmzdMt5h47 rGB+ct4JdLBH4KJgqe2Bilrqg+FJmL3XT5k0aU3driy4Gb+bcDGeEyVmTWcnNRIg DzfUlNwTKhAhZDl8D3B9X2vV8TZDBtrRLquI94eYvooF3LYDL+kExLUW8WDmmAfy mjnANs8c+EtcVAzN7tW+O1UqdYYJ8Yo4ngk1nVVRdRvA2BDp9ixgWi/m/3jZ3JmJ jySV1zsZJB3ZGp/hIuEvtGY7jheDtbTnfgtI+vwjVdr208acs+XhfDckuOZBZIUY 7Zk+Qif/narxFAoAvkgkH5QDNSSReKqaHgzohfnzSQqrfO0bh6fw1FnBOm4iXT7C cXvReD4m36g46SdTsxnvttpXizXIFe4JPOkpRkBzxIQFaMTA4Is43W0lYC24Ppxj A0UVevh3HPhOYzABynuy0EnknZeylb6P+WpGG6Ge+sVrVquQiwR01n4HeoaJO3qe re46uUGwO8Q30blYY50HBSJp0bpcciPZRVMJaspcAT9KD0fJ1s/csd2lQyP4ewn6 A0zg6eabc0PD3LwdlHqp//jTNft/BL4RVZ2c3uM+mgXnGeekcoQ= =EysX -----END PGP SIGNATURE----- Merge tag 'for-5.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "Several fixes for defragmentation that got broken in 5.16 after refactoring and added subpage support. The observed bugs are excessive IO or uninterruptible ioctl. All stable material" * tag 'for-5.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: update writeback index when starting defrag btrfs: add back missing dirty page rate limiting to defrag btrfs: fix deadlock when reserving space during defrag btrfs: defrag: properly update range->start for autodefrag btrfs: defrag: fix wrong number of defragged sectors btrfs: allow defrag to be interruptible btrfs: fix too long loop when defragging a 1 byte file
This commit is contained in:
commit
49d766f3a0
|
@ -1213,6 +1213,35 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||||
if (em->generation < newer_than)
|
if (em->generation < newer_than)
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Our start offset might be in the middle of an existing extent
|
||||||
|
* map, so take that into account.
|
||||||
|
*/
|
||||||
|
range_len = em->len - (cur - em->start);
|
||||||
|
/*
|
||||||
|
* If this range of the extent map is already flagged for delalloc,
|
||||||
|
* skip it, because:
|
||||||
|
*
|
||||||
|
* 1) We could deadlock later, when trying to reserve space for
|
||||||
|
* delalloc, because in case we can't immediately reserve space
|
||||||
|
* the flusher can start delalloc and wait for the respective
|
||||||
|
* ordered extents to complete. The deadlock would happen
|
||||||
|
* because we do the space reservation while holding the range
|
||||||
|
* locked, and starting writeback, or finishing an ordered
|
||||||
|
* extent, requires locking the range;
|
||||||
|
*
|
||||||
|
* 2) If there's delalloc there, it means there's dirty pages for
|
||||||
|
* which writeback has not started yet (we clean the delalloc
|
||||||
|
* flag when starting writeback and after creating an ordered
|
||||||
|
* extent). If we mark pages in an adjacent range for defrag,
|
||||||
|
* then we will have a larger contiguous range for delalloc,
|
||||||
|
* very likely resulting in a larger extent after writeback is
|
||||||
|
* triggered (except in a case of free space fragmentation).
|
||||||
|
*/
|
||||||
|
if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1,
|
||||||
|
EXTENT_DELALLOC, 0, NULL))
|
||||||
|
goto next;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For do_compress case, we want to compress all valid file
|
* For do_compress case, we want to compress all valid file
|
||||||
* extents, thus no @extent_thresh or mergeable check.
|
* extents, thus no @extent_thresh or mergeable check.
|
||||||
|
@ -1221,7 +1250,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||||
goto add;
|
goto add;
|
||||||
|
|
||||||
/* Skip too large extent */
|
/* Skip too large extent */
|
||||||
if (em->len >= extent_thresh)
|
if (range_len >= extent_thresh)
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
|
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
|
||||||
|
@ -1442,9 +1471,11 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
|
||||||
list_for_each_entry(entry, &target_list, list) {
|
list_for_each_entry(entry, &target_list, list) {
|
||||||
u32 range_len = entry->len;
|
u32 range_len = entry->len;
|
||||||
|
|
||||||
/* Reached the limit */
|
/* Reached or beyond the limit */
|
||||||
if (max_sectors && max_sectors == *sectors_defragged)
|
if (max_sectors && *sectors_defragged >= max_sectors) {
|
||||||
|
ret = 1;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (max_sectors)
|
if (max_sectors)
|
||||||
range_len = min_t(u32, range_len,
|
range_len = min_t(u32, range_len,
|
||||||
|
@ -1465,7 +1496,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
|
||||||
extent_thresh, newer_than, do_compress);
|
extent_thresh, newer_than, do_compress);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
break;
|
break;
|
||||||
*sectors_defragged += range_len;
|
*sectors_defragged += range_len >>
|
||||||
|
inode->root->fs_info->sectorsize_bits;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
list_for_each_entry_safe(entry, tmp, &target_list, list) {
|
list_for_each_entry_safe(entry, tmp, &target_list, list) {
|
||||||
|
@ -1484,6 +1516,12 @@ out:
|
||||||
* @newer_than: minimum transid to defrag
|
* @newer_than: minimum transid to defrag
|
||||||
* @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
|
* @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
|
||||||
* will be defragged.
|
* will be defragged.
|
||||||
|
*
|
||||||
|
* Return <0 for error.
|
||||||
|
* Return >=0 for the number of sectors defragged, and range->start will be updated
|
||||||
|
* to indicate the file offset where next defrag should be started at.
|
||||||
|
* (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without
|
||||||
|
* defragging all the range).
|
||||||
*/
|
*/
|
||||||
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||||
struct btrfs_ioctl_defrag_range_args *range,
|
struct btrfs_ioctl_defrag_range_args *range,
|
||||||
|
@ -1499,6 +1537,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||||
int compress_type = BTRFS_COMPRESS_ZLIB;
|
int compress_type = BTRFS_COMPRESS_ZLIB;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
u32 extent_thresh = range->extent_thresh;
|
u32 extent_thresh = range->extent_thresh;
|
||||||
|
pgoff_t start_index;
|
||||||
|
|
||||||
if (isize == 0)
|
if (isize == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1518,12 +1557,16 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||||
|
|
||||||
if (range->start + range->len > range->start) {
|
if (range->start + range->len > range->start) {
|
||||||
/* Got a specific range */
|
/* Got a specific range */
|
||||||
last_byte = min(isize, range->start + range->len) - 1;
|
last_byte = min(isize, range->start + range->len);
|
||||||
} else {
|
} else {
|
||||||
/* Defrag until file end */
|
/* Defrag until file end */
|
||||||
last_byte = isize - 1;
|
last_byte = isize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Align the range */
|
||||||
|
cur = round_down(range->start, fs_info->sectorsize);
|
||||||
|
last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we were not given a ra, allocate a readahead context. As
|
* If we were not given a ra, allocate a readahead context. As
|
||||||
* readahead is just an optimization, defrag will work without it so
|
* readahead is just an optimization, defrag will work without it so
|
||||||
|
@ -1536,16 +1579,26 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||||
file_ra_state_init(ra, inode->i_mapping);
|
file_ra_state_init(ra, inode->i_mapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Align the range */
|
/*
|
||||||
cur = round_down(range->start, fs_info->sectorsize);
|
* Make writeback start from the beginning of the range, so that the
|
||||||
last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
|
* defrag range can be written sequentially.
|
||||||
|
*/
|
||||||
|
start_index = cur >> PAGE_SHIFT;
|
||||||
|
if (start_index < inode->i_mapping->writeback_index)
|
||||||
|
inode->i_mapping->writeback_index = start_index;
|
||||||
|
|
||||||
while (cur < last_byte) {
|
while (cur < last_byte) {
|
||||||
|
const unsigned long prev_sectors_defragged = sectors_defragged;
|
||||||
u64 cluster_end;
|
u64 cluster_end;
|
||||||
|
|
||||||
/* The cluster size 256K should always be page aligned */
|
/* The cluster size 256K should always be page aligned */
|
||||||
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
|
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
|
||||||
|
|
||||||
|
if (btrfs_defrag_cancelled(fs_info)) {
|
||||||
|
ret = -EAGAIN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* We want the cluster end at page boundary when possible */
|
/* We want the cluster end at page boundary when possible */
|
||||||
cluster_end = (((cur >> PAGE_SHIFT) +
|
cluster_end = (((cur >> PAGE_SHIFT) +
|
||||||
(SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
|
(SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
|
||||||
|
@ -1567,14 +1620,27 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||||
cluster_end + 1 - cur, extent_thresh,
|
cluster_end + 1 - cur, extent_thresh,
|
||||||
newer_than, do_compress,
|
newer_than, do_compress,
|
||||||
§ors_defragged, max_to_defrag);
|
§ors_defragged, max_to_defrag);
|
||||||
|
|
||||||
|
if (sectors_defragged > prev_sectors_defragged)
|
||||||
|
balance_dirty_pages_ratelimited(inode->i_mapping);
|
||||||
|
|
||||||
btrfs_inode_unlock(inode, 0);
|
btrfs_inode_unlock(inode, 0);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
break;
|
break;
|
||||||
cur = cluster_end + 1;
|
cur = cluster_end + 1;
|
||||||
|
if (ret > 0) {
|
||||||
|
ret = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ra_allocated)
|
if (ra_allocated)
|
||||||
kfree(ra);
|
kfree(ra);
|
||||||
|
/*
|
||||||
|
* Update range.start for autodefrag, this will indicate where to start
|
||||||
|
* in next run.
|
||||||
|
*/
|
||||||
|
range->start = cur;
|
||||||
if (sectors_defragged) {
|
if (sectors_defragged) {
|
||||||
/*
|
/*
|
||||||
* We have defragged some sectors, for compression case they
|
* We have defragged some sectors, for compression case they
|
||||||
|
|
Loading…
Reference in New Issue