for-6.0-rc4-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmMaPukACgkQxWXV+ddt WDsWKw/+IcpMsb08sjudn4dtFQ3HSA1E+dOYDzXwUJTS7ZpZhLRniLe1XQwHxe4D 7DUQA+e1RKGq4+TiznoLhaG/YCCcrLPZL/1aWhwO0M5Wj6BCIxSUa00BJNpxyBMw kWb9vQltc5w5zJXHeIr7m2ByzT+YIl0v1lf2GQrJVieHhGiKslfkJHLoJt49oJ0L 9ka183VR/OCi/3uxUw6NMAjfv+0OGEsFZX/CF8Vo64IKg0I0Q248H4enZt43aDHA dQDapAyAr4f6RLDs6ULS2GSzKfZIKMLHlvSeg1BSPyUt/NZFVlC0VwVX0NmwP62a 5NECYdimlQOGSlaahNEQpLIiyNYboi3Mq7m63BofWduDQanpnM1FByln9JVEizlm VuUs3+O0CMp81HecSk3VbSe3ukO2fqAdQjM5cdpRx30TYu7WRiYNE3aHchgLmXLP 0zw9JV6ePg04Mstx+/3lo8D/X/7fMAT3NrqYmuImoekFWbdJfsiUtgdXNOglT9dt 6lb1/0jBEbdiXnQ/jT1OreGwSdGZqkEKF4OE26kPRxURyTDESzglNVyhXmshIANC qnNuUFGea5d7LbyozYyfdcsQS7rEqLVKmUWrOb/3O/K1947/DegYodnhRwjCUSS7 iUaetkYUWxHa7U9303KneCUAyLEf1S8NXRPIObL6YIw7D09wato= =WD7B -----END PGP SIGNATURE----- Merge tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "A few more fixes to zoned mode and one regression fix for chunk limit: - Zoned mode fixes: - fix how wait/wake up is done when finishing zone - fix zone append limit in emulated mode - fix mount on devices with conventional zones - fix regression, user settable data chunk limit got accidentally lowered and causes allocation problems on some profiles (raid0, raid1)" * tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix the max chunk size and stripe length calculation btrfs: zoned: fix mounting with conventional zones btrfs: zoned: set pseudo max append zone limit in zone emulation mode btrfs: zoned: fix API misuse of zone finish waiting
This commit is contained in:
commit
9b45094954
|
@ -1088,8 +1088,6 @@ struct btrfs_fs_info {
|
|||
|
||||
spinlock_t zone_active_bgs_lock;
|
||||
struct list_head zone_active_bgs;
|
||||
/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
|
||||
wait_queue_head_t zone_finish_wait;
|
||||
|
||||
/* Updates are not protected by any lock */
|
||||
struct btrfs_commit_stats commit_stats;
|
||||
|
|
|
@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
|||
init_waitqueue_head(&fs_info->transaction_blocked_wait);
|
||||
init_waitqueue_head(&fs_info->async_submit_wait);
|
||||
init_waitqueue_head(&fs_info->delayed_iputs_wait);
|
||||
init_waitqueue_head(&fs_info->zone_finish_wait);
|
||||
|
||||
/* Usable values until the real ones are cached from the superblock */
|
||||
fs_info->nodesize = 4096;
|
||||
|
|
|
@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
|
|||
done_offset = end;
|
||||
|
||||
if (done_offset == start) {
|
||||
struct btrfs_fs_info *info = inode->root->fs_info;
|
||||
|
||||
wait_var_event(&info->zone_finish_wait,
|
||||
!test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
|
||||
wait_on_bit_io(&inode->root->fs_info->flags,
|
||||
BTRFS_FS_NEED_ZONE_FINISH,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
|
|||
ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
|
||||
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
return SZ_1G;
|
||||
return BTRFS_MAX_DATA_CHUNK_SIZE;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
return SZ_32M;
|
||||
|
||||
|
|
|
@ -5267,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
|
|||
ctl->stripe_size);
|
||||
}
|
||||
|
||||
/* Stripe size should not go beyond 1G. */
|
||||
ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
|
||||
|
||||
/* Align to BTRFS_STRIPE_LEN */
|
||||
ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
|
||||
ctl->chunk_size = ctl->stripe_size * data_stripes;
|
||||
|
|
|
@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
|
|||
* since btrfs adds the pages one by one to a bio, and btrfs cannot
|
||||
* increase the metadata reservation even if it increases the number of
|
||||
* extents, it is safe to stick with the limit.
|
||||
*
|
||||
* With the zoned emulation, we can have non-zoned device on the zoned
|
||||
* mode. In this case, we don't have a valid max zone append size. So,
|
||||
* use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
|
||||
*/
|
||||
zone_info->max_zone_append_size =
|
||||
min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
|
||||
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
|
||||
if (bdev_is_zoned(bdev)) {
|
||||
zone_info->max_zone_append_size = min_t(u64,
|
||||
(u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
|
||||
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
|
||||
} else {
|
||||
zone_info->max_zone_append_size =
|
||||
(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
|
||||
}
|
||||
if (!IS_ALIGNED(nr_sectors, zone_sectors))
|
||||
zone_info->nr_zones++;
|
||||
|
||||
|
@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
|
|||
* offset.
|
||||
*/
|
||||
static int calculate_alloc_pointer(struct btrfs_block_group *cache,
|
||||
u64 *offset_ret)
|
||||
u64 *offset_ret, bool new)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
struct btrfs_root *root;
|
||||
|
@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
|
|||
int ret;
|
||||
u64 length;
|
||||
|
||||
/*
|
||||
* Avoid tree lookups for a new block group, there's no use for it.
|
||||
* It must always be 0.
|
||||
*
|
||||
* Also, we have a lock chain of extent buffer lock -> chunk mutex.
|
||||
* For new a block group, this function is called from
|
||||
* btrfs_make_block_group() which is already taking the chunk mutex.
|
||||
* Thus, we cannot call calculate_alloc_pointer() which takes extent
|
||||
* buffer locks to avoid deadlock.
|
||||
*/
|
||||
if (new) {
|
||||
*offset_ret = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||
else
|
||||
num_conventional++;
|
||||
|
||||
/*
|
||||
* Consider a zone as active if we can allow any number of
|
||||
* active zones.
|
||||
*/
|
||||
if (!device->zone_info->max_active_zones)
|
||||
__set_bit(i, active);
|
||||
|
||||
if (!is_sequential) {
|
||||
alloc_offsets[i] = WP_CONVENTIONAL;
|
||||
continue;
|
||||
|
@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||
__set_bit(i, active);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider a zone as active if we can allow any number of
|
||||
* active zones.
|
||||
*/
|
||||
if (!device->zone_info->max_active_zones)
|
||||
__set_bit(i, active);
|
||||
}
|
||||
|
||||
if (num_sequential > 0)
|
||||
cache->seq_zone = true;
|
||||
|
||||
if (num_conventional > 0) {
|
||||
/*
|
||||
* Avoid calling calculate_alloc_pointer() for new BG. It
|
||||
* is no use for new BG. It must be always 0.
|
||||
*
|
||||
* Also, we have a lock chain of extent buffer lock ->
|
||||
* chunk mutex. For new BG, this function is called from
|
||||
* btrfs_make_block_group() which is already taking the
|
||||
* chunk mutex. Thus, we cannot call
|
||||
* calculate_alloc_pointer() which takes extent buffer
|
||||
* locks to avoid deadlock.
|
||||
*/
|
||||
|
||||
/* Zone capacity is always zone size in emulation */
|
||||
cache->zone_capacity = cache->length;
|
||||
if (new) {
|
||||
cache->alloc_offset = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = calculate_alloc_pointer(cache, &last_alloc);
|
||||
if (ret || map->num_stripes == num_conventional) {
|
||||
if (!ret)
|
||||
cache->alloc_offset = last_alloc;
|
||||
else
|
||||
btrfs_err(fs_info,
|
||||
ret = calculate_alloc_pointer(cache, &last_alloc, new);
|
||||
if (ret) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: failed to determine allocation offset of bg %llu",
|
||||
cache->start);
|
||||
cache->start);
|
||||
goto out;
|
||||
} else if (map->num_stripes == num_conventional) {
|
||||
cache->alloc_offset = last_alloc;
|
||||
cache->zone_is_active = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (cache->zone_is_active) {
|
||||
btrfs_get_block_group(cache);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
}
|
||||
|
||||
out:
|
||||
if (cache->alloc_offset > fs_info->zone_size) {
|
||||
btrfs_err(fs_info,
|
||||
|
@ -1526,10 +1528,16 @@ out:
|
|||
ret = -EIO;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
cache->meta_write_pointer = cache->alloc_offset + cache->start;
|
||||
|
||||
if (ret) {
|
||||
if (cache->zone_is_active) {
|
||||
btrfs_get_block_group(cache);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_add_tail(&cache->active_bg_list,
|
||||
&fs_info->zone_active_bgs);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
}
|
||||
} else {
|
||||
kfree(cache->physical_map);
|
||||
cache->physical_map = NULL;
|
||||
}
|
||||
|
@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
|||
/* For active_bg_list */
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
|
||||
wake_up_all(&fs_info->zone_finish_wait);
|
||||
clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue