for-6.0-rc4-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmMaPukACgkQxWXV+ddt
 WDsWKw/+IcpMsb08sjudn4dtFQ3HSA1E+dOYDzXwUJTS7ZpZhLRniLe1XQwHxe4D
 7DUQA+e1RKGq4+TiznoLhaG/YCCcrLPZL/1aWhwO0M5Wj6BCIxSUa00BJNpxyBMw
 kWb9vQltc5w5zJXHeIr7m2ByzT+YIl0v1lf2GQrJVieHhGiKslfkJHLoJt49oJ0L
 9ka183VR/OCi/3uxUw6NMAjfv+0OGEsFZX/CF8Vo64IKg0I0Q248H4enZt43aDHA
 dQDapAyAr4f6RLDs6ULS2GSzKfZIKMLHlvSeg1BSPyUt/NZFVlC0VwVX0NmwP62a
 5NECYdimlQOGSlaahNEQpLIiyNYboi3Mq7m63BofWduDQanpnM1FByln9JVEizlm
 VuUs3+O0CMp81HecSk3VbSe3ukO2fqAdQjM5cdpRx30TYu7WRiYNE3aHchgLmXLP
 0zw9JV6ePg04Mstx+/3lo8D/X/7fMAT3NrqYmuImoekFWbdJfsiUtgdXNOglT9dt
 6lb1/0jBEbdiXnQ/jT1OreGwSdGZqkEKF4OE26kPRxURyTDESzglNVyhXmshIANC
 qnNuUFGea5d7LbyozYyfdcsQS7rEqLVKmUWrOb/3O/K1947/DegYodnhRwjCUSS7
 iUaetkYUWxHa7U9303KneCUAyLEf1S8NXRPIObL6YIw7D09wato=
 =WD7B
 -----END PGP SIGNATURE-----

Merge tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few more fixes to zoned mode and one regression fix for chunk limit:

    - Zoned mode fixes:
        - fix how wait/wake up is done when finishing zone
        - fix zone append limit in emulated mode
        - fix mount on devices with conventional zones

   - fix regression, user settable data chunk limit got accidentally
     lowered and causes allocation problems on some profiles (raid0,
     raid1)"

* tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix the max chunk size and stripe length calculation
  btrfs: zoned: fix mounting with conventional zones
  btrfs: zoned: set pseudo max append zone limit in zone emulation mode
  btrfs: zoned: fix API misuse of zone finish waiting
This commit is contained in:
Linus Torvalds 2022-09-09 07:54:19 -04:00
commit 9b45094954
6 changed files with 60 additions and 54 deletions

View File

@ -1088,8 +1088,6 @@ struct btrfs_fs_info {
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
wait_queue_head_t zone_finish_wait;
/* Updates are not protected by any lock */
struct btrfs_commit_stats commit_stats;

View File

@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
init_waitqueue_head(&fs_info->zone_finish_wait);
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;

View File

@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
done_offset = end;
if (done_offset == start) {
struct btrfs_fs_info *info = inode->root->fs_info;
wait_var_event(&info->zone_finish_wait,
!test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
wait_on_bit_io(&inode->root->fs_info->flags,
BTRFS_FS_NEED_ZONE_FINISH,
TASK_UNINTERRUPTIBLE);
continue;
}

View File

@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
if (flags & BTRFS_BLOCK_GROUP_DATA)
return SZ_1G;
return BTRFS_MAX_DATA_CHUNK_SIZE;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return SZ_32M;

View File

@ -5267,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
ctl->stripe_size);
}
/* Stripe size should not go beyond 1G. */
ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
/* Align to BTRFS_STRIPE_LEN */
ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
ctl->chunk_size = ctl->stripe_size * data_stripes;

View File

@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
* since btrfs adds the pages one by one to a bio, and btrfs cannot
* increase the metadata reservation even if it increases the number of
* extents, it is safe to stick with the limit.
*
* With the zoned emulation, we can have non-zoned device on the zoned
* mode. In this case, we don't have a valid max zone append size. So,
* use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
*/
zone_info->max_zone_append_size =
min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
if (bdev_is_zoned(bdev)) {
zone_info->max_zone_append_size = min_t(u64,
(u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
} else {
zone_info->max_zone_append_size =
(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
}
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
* offset.
*/
static int calculate_alloc_pointer(struct btrfs_block_group *cache,
u64 *offset_ret)
u64 *offset_ret, bool new)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_root *root;
@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
int ret;
u64 length;
/*
* Avoid tree lookups for a new block group, there's no use for it.
* It must always be 0.
*
* Also, we have a lock chain of extent buffer lock -> chunk mutex.
* For new a block group, this function is called from
* btrfs_make_block_group() which is already taking the chunk mutex.
* Thus, we cannot call calculate_alloc_pointer() which takes extent
* buffer locks to avoid deadlock.
*/
if (new) {
*offset_ret = 0;
return 0;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
else
num_conventional++;
/*
* Consider a zone as active if we can allow any number of
* active zones.
*/
if (!device->zone_info->max_active_zones)
__set_bit(i, active);
if (!is_sequential) {
alloc_offsets[i] = WP_CONVENTIONAL;
continue;
@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
__set_bit(i, active);
break;
}
/*
* Consider a zone as active if we can allow any number of
* active zones.
*/
if (!device->zone_info->max_active_zones)
__set_bit(i, active);
}
if (num_sequential > 0)
cache->seq_zone = true;
if (num_conventional > 0) {
/*
* Avoid calling calculate_alloc_pointer() for new BG. It
* is no use for new BG. It must be always 0.
*
* Also, we have a lock chain of extent buffer lock ->
* chunk mutex. For new BG, this function is called from
* btrfs_make_block_group() which is already taking the
* chunk mutex. Thus, we cannot call
* calculate_alloc_pointer() which takes extent buffer
* locks to avoid deadlock.
*/
/* Zone capacity is always zone size in emulation */
cache->zone_capacity = cache->length;
if (new) {
cache->alloc_offset = 0;
goto out;
}
ret = calculate_alloc_pointer(cache, &last_alloc);
if (ret || map->num_stripes == num_conventional) {
if (!ret)
cache->alloc_offset = last_alloc;
else
btrfs_err(fs_info,
ret = calculate_alloc_pointer(cache, &last_alloc, new);
if (ret) {
btrfs_err(fs_info,
"zoned: failed to determine allocation offset of bg %llu",
cache->start);
cache->start);
goto out;
} else if (map->num_stripes == num_conventional) {
cache->alloc_offset = last_alloc;
cache->zone_is_active = 1;
goto out;
}
}
@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
if (cache->zone_is_active) {
btrfs_get_block_group(cache);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
}
out:
if (cache->alloc_offset > fs_info->zone_size) {
btrfs_err(fs_info,
@ -1526,10 +1528,16 @@ out:
ret = -EIO;
}
if (!ret)
if (!ret) {
cache->meta_write_pointer = cache->alloc_offset + cache->start;
if (ret) {
if (cache->zone_is_active) {
btrfs_get_block_group(cache);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&cache->active_bg_list,
&fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
}
} else {
kfree(cache->physical_map);
cache->physical_map = NULL;
}
@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* For active_bg_list */
btrfs_put_block_group(block_group);
clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
wake_up_all(&fs_info->zone_finish_wait);
clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
return 0;
}