- Fix DM thinp discard performance regression introduced during 6.4

merge; where DM core was splitting large discards every 128K
   (max_sectors_kb) rather than every 64M (discard_max_bytes).
 
 - Extend DM core LOCKFS fix, made during 6.4 merge, to also fix race
   between do_mount and dm's do_suspend (in addition to the earlier
   fix's do_mount race with dm's do_resume).
 
 - Fix DM thin metadata operations to first check if the thin-pool is
   in "fail_io" mode; otherwise UAF can occur.
 
 - Fix DM thinp's call to __blkdev_issue_discard to use GFP_NOIO rather
   than GFP_NOWAIT (__blkdev_issue_discard cannot handle NULL return
   from bio_alloc).
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEJfWUX4UqZ4x1O2wixSPxCi2dA1oFAmSLTh0ACgkQxSPxCi2d
 A1rmqggAildPKBjT8nqZmU86lpsy60E03OwvBnGPkMF5pjkOUmTjkb5EWVSAmeuO
 ojj0pWlC+1ZvVkiDfkWxt0NL/4ETD4q+5oy1ARBcOawPX6bj0eXLoBr6m10b+KOb
 mKAoXgYrESEzQ2qPBe4a4Lj3zIBXzXpMpW9TtF23z4HnDpnwpED5xNPWBgiWc3O/
 /6MF1ASLp0DWldoL+gmIp9hEzyQzbzgM4uBOGC4UAYk3U1I55qwX6bWDZ9cQNGMh
 AqCSrphuKHvbsb31yb1X3hB3g1XbAeSvvcizgFY0g9ZpncddKm5gx0BWVDO7qGBG
 UxLIec19kQ2CIEx/QJZIhEjneLlJ/g==
 =mME4
 -----END PGP SIGNATURE-----

Merge tag 'for-6.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM thinp discard performance regression introduced during this
   merge window where DM core was splitting large discards every 128K
   (max_sectors_kb) rather than every 64M (discard_max_bytes).

 - Extend DM core LOCKFS fix, made during 6.4 merge, to also fix race
   between do_mount and dm's do_suspend (in addition to the earlier
   fix's do_mount race with dm's do_resume).

 - Fix DM thin metadata operations to first check if the thin-pool is in
   "fail_io" mode; otherwise UAF can occur.

 - Fix DM thinp's call to __blkdev_issue_discard to use GFP_NOIO rather
   than GFP_NOWAIT (__blkdev_issue_discard cannot handle NULL return
   from bio_alloc).

* tag 'for-6.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: use op specific max_sectors when splitting abnormal io
  dm thin: fix issue_discard to pass GFP_NOIO to __blkdev_issue_discard
  dm thin metadata: check fail_io before using data_sm
  dm: don't lock fs when the map is NULL during suspend or resume
This commit is contained in:
Linus Torvalds 2023-06-15 20:19:21 -07:00
commit 0e306952d7
4 changed files with 34 additions and 23 deletions

View File

@ -1168,13 +1168,10 @@ static int do_resume(struct dm_ioctl *param)
/* Do we need to load a new map ? */
if (new_map) {
sector_t old_size, new_size;
int srcu_idx;
/* Suspend if it isn't already suspended */
old_map = dm_get_live_table(md, &srcu_idx);
if ((param->flags & DM_SKIP_LOCKFS_FLAG) || !old_map)
if (param->flags & DM_SKIP_LOCKFS_FLAG)
suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
dm_put_live_table(md, srcu_idx);
if (param->flags & DM_NOFLUSH_FLAG)
suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
if (!dm_suspended_md(md))

View File

@ -1756,13 +1756,15 @@ int dm_thin_remove_range(struct dm_thin_device *td,
int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
{
int r;
int r = -EINVAL;
uint32_t ref_count;
down_read(&pmd->root_lock);
r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
if (!r)
*result = (ref_count > 1);
if (!pmd->fail_io) {
r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
if (!r)
*result = (ref_count > 1);
}
up_read(&pmd->root_lock);
return r;
@ -1770,10 +1772,11 @@ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *re
int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
{
int r = 0;
int r = -EINVAL;
pmd_write_lock(pmd);
r = dm_sm_inc_blocks(pmd->data_sm, b, e);
if (!pmd->fail_io)
r = dm_sm_inc_blocks(pmd->data_sm, b, e);
pmd_write_unlock(pmd);
return r;
@ -1781,10 +1784,11 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
{
int r = 0;
int r = -EINVAL;
pmd_write_lock(pmd);
r = dm_sm_dec_blocks(pmd->data_sm, b, e);
if (!pmd->fail_io)
r = dm_sm_dec_blocks(pmd->data_sm, b, e);
pmd_write_unlock(pmd);
return r;

View File

@ -401,8 +401,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
sector_t s = block_to_sectors(tc->pool, data_b);
sector_t len = block_to_sectors(tc->pool, data_e - data_b);
return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT,
&op->bio);
return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
}
static void end_discard(struct discard_op *op, int r)

View File

@ -1172,7 +1172,8 @@ static inline sector_t max_io_len_target_boundary(struct dm_target *ti,
}
static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
unsigned int max_granularity)
unsigned int max_granularity,
unsigned int max_sectors)
{
sector_t target_offset = dm_target_offset(ti, sector);
sector_t len = max_io_len_target_boundary(ti, target_offset);
@ -1186,13 +1187,13 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
if (!max_granularity)
return len;
return min_t(sector_t, len,
min(queue_max_sectors(ti->table->md->queue),
min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
blk_chunk_sectors_left(target_offset, max_granularity)));
}
static inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
{
return __max_io_len(ti, sector, ti->max_io_len);
return __max_io_len(ti, sector, ti->max_io_len, 0);
}
int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
@ -1581,12 +1582,13 @@ static void __send_empty_flush(struct clone_info *ci)
static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios,
unsigned int max_granularity)
unsigned int max_granularity,
unsigned int max_sectors)
{
unsigned int len, bios;
len = min_t(sector_t, ci->sector_count,
__max_io_len(ti, ci->sector, max_granularity));
__max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, num_bios, &len);
@ -1623,23 +1625,27 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
{
unsigned int num_bios = 0;
unsigned int max_granularity = 0;
unsigned int max_sectors = 0;
struct queue_limits *limits = dm_get_queue_limits(ti->table->md);
switch (bio_op(ci->bio)) {
case REQ_OP_DISCARD:
num_bios = ti->num_discard_bios;
max_sectors = limits->max_discard_sectors;
if (ti->max_discard_granularity)
max_granularity = limits->max_discard_sectors;
max_granularity = max_sectors;
break;
case REQ_OP_SECURE_ERASE:
num_bios = ti->num_secure_erase_bios;
max_sectors = limits->max_secure_erase_sectors;
if (ti->max_secure_erase_granularity)
max_granularity = limits->max_secure_erase_sectors;
max_granularity = max_sectors;
break;
case REQ_OP_WRITE_ZEROES:
num_bios = ti->num_write_zeroes_bios;
max_sectors = limits->max_write_zeroes_sectors;
if (ti->max_write_zeroes_granularity)
max_granularity = limits->max_write_zeroes_sectors;
max_granularity = max_sectors;
break;
default:
break;
@ -1654,7 +1660,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
if (unlikely(!num_bios))
return BLK_STS_NOTSUPP;
__send_changing_extent_only(ci, ti, num_bios, max_granularity);
__send_changing_extent_only(ci, ti, num_bios,
max_granularity, max_sectors);
return BLK_STS_OK;
}
@ -2808,6 +2815,10 @@ retry:
}
map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
if (!map) {
/* avoid deadlock with fs/namespace.c:do_mount() */
suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
}
r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
if (r)