- Fix DM cache metadata to verify that a cache has block before trying
to continue with operation that requires them. - Fix bio-based DM core's dm_make_request() to properly impose device limits on individual bios by making use of blk_queue_split(). - Fix long-standing race with how DM thinp notified userspace of thin-pool mode state changes before they were actually made. - Fix the zoned target's bio completion handling; this is a fairly invassive fix at this stage but it is localized to the zoned target. Any zoned target users will benefit from this fix. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJcEUXZAAoJEMUj8QotnQNaBykIANWuydEZ0bleZQvdcUCK256K MYQi16kdettxwAlzojxgpD1gjpAYuQJWRBeSyLxpMPa/jrNh8U5pcgkb8EQkaSd0 0KWxJS8V85a+fKTGpyaK5vVmbZcezY3GADGv5GDC2yeBTZJTcFWhsGQwfP/Il/X3 fKo9qOs2sabdCbR11U3psicsRbMVIkyDfX23hIZWSdVPNI43YKWugFZ1irOhh9gD QNyUJ1cDOGYTwmTKHuJ9IidjuuU6rfhkbAek9TWTkhmWHoshlr3j9fpIOteB8U0M vNu4oLedm+QBV8jOwplyAbDG7hxx8V4RNiNy31g4Er6KJltiMVpAbfOYdBpa3WE= =YHnh -----END PGP SIGNATURE----- Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper fixes from Mike Snitzer: - Fix DM cache metadata to verify that a cache has block before trying to continue with operation that requires them. - Fix bio-based DM core's dm_make_request() to properly impose device limits on individual bios by making use of blk_queue_split(). - Fix long-standing race with how DM thinp notified userspace of thin-pool mode state changes before they were actually made. - Fix the zoned target's bio completion handling; this is a fairly invassive fix at this stage but it is localized to the zoned target. Any zoned target users will benefit from this fix. * tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm thin: bump target version dm thin: send event about thin-pool state change _after_ making it dm zoned: Fix target BIO completion handling dm: call blk_queue_split() to impose device limits on bios dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()
This commit is contained in:
commit
67f2a93099
|
@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
|
|||
bool dirty_flag;
|
||||
*result = true;
|
||||
|
||||
if (from_cblock(cmd->cache_blocks) == 0)
|
||||
/* Nothing to do */
|
||||
return 0;
|
||||
|
||||
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
|
||||
from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
|
||||
if (r) {
|
||||
|
|
|
@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t)
|
|||
struct dm_thin_new_mapping;
|
||||
|
||||
/*
|
||||
* The pool runs in 4 modes. Ordered in degraded order for comparisons.
|
||||
* The pool runs in various modes. Ordered in degraded order for comparisons.
|
||||
*/
|
||||
enum pool_mode {
|
||||
PM_WRITE, /* metadata may be changed */
|
||||
|
@ -282,9 +282,38 @@ struct pool {
|
|||
mempool_t mapping_pool;
|
||||
};
|
||||
|
||||
static enum pool_mode get_pool_mode(struct pool *pool);
|
||||
static void metadata_operation_failed(struct pool *pool, const char *op, int r);
|
||||
|
||||
static enum pool_mode get_pool_mode(struct pool *pool)
|
||||
{
|
||||
return pool->pf.mode;
|
||||
}
|
||||
|
||||
static void notify_of_pool_mode_change(struct pool *pool)
|
||||
{
|
||||
const char *descs[] = {
|
||||
"write",
|
||||
"out-of-data-space",
|
||||
"read-only",
|
||||
"read-only",
|
||||
"fail"
|
||||
};
|
||||
const char *extra_desc = NULL;
|
||||
enum pool_mode mode = get_pool_mode(pool);
|
||||
|
||||
if (mode == PM_OUT_OF_DATA_SPACE) {
|
||||
if (!pool->pf.error_if_no_space)
|
||||
extra_desc = " (queue IO)";
|
||||
else
|
||||
extra_desc = " (error IO)";
|
||||
}
|
||||
|
||||
dm_table_event(pool->ti->table);
|
||||
DMINFO("%s: switching pool to %s%s mode",
|
||||
dm_device_name(pool->pool_md),
|
||||
descs[(int)mode], extra_desc ? : "");
|
||||
}
|
||||
|
||||
/*
|
||||
* Target context for a pool.
|
||||
*/
|
||||
|
@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws)
|
|||
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
|
||||
}
|
||||
|
||||
static void notify_of_pool_mode_change_to_oods(struct pool *pool);
|
||||
|
||||
/*
|
||||
* We're holding onto IO to allow userland time to react. After the
|
||||
* timeout either the pool will have been resized (and thus back in
|
||||
|
@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws)
|
|||
|
||||
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
|
||||
pool->pf.error_if_no_space = true;
|
||||
notify_of_pool_mode_change_to_oods(pool);
|
||||
notify_of_pool_mode_change(pool);
|
||||
error_retry_list_with_code(pool, BLK_STS_NOSPC);
|
||||
}
|
||||
}
|
||||
|
@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
|
|||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
static enum pool_mode get_pool_mode(struct pool *pool)
|
||||
{
|
||||
return pool->pf.mode;
|
||||
}
|
||||
|
||||
static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
|
||||
{
|
||||
dm_table_event(pool->ti->table);
|
||||
DMINFO("%s: switching pool to %s mode",
|
||||
dm_device_name(pool->pool_md), new_mode);
|
||||
}
|
||||
|
||||
static void notify_of_pool_mode_change_to_oods(struct pool *pool)
|
||||
{
|
||||
if (!pool->pf.error_if_no_space)
|
||||
notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
|
||||
else
|
||||
notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
|
||||
}
|
||||
|
||||
static bool passdown_enabled(struct pool_c *pt)
|
||||
{
|
||||
return pt->adjusted_pf.discard_passdown;
|
||||
|
@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|||
|
||||
switch (new_mode) {
|
||||
case PM_FAIL:
|
||||
if (old_mode != new_mode)
|
||||
notify_of_pool_mode_change(pool, "failure");
|
||||
dm_pool_metadata_read_only(pool->pmd);
|
||||
pool->process_bio = process_bio_fail;
|
||||
pool->process_discard = process_bio_fail;
|
||||
|
@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|||
|
||||
case PM_OUT_OF_METADATA_SPACE:
|
||||
case PM_READ_ONLY:
|
||||
if (!is_read_only_pool_mode(old_mode))
|
||||
notify_of_pool_mode_change(pool, "read-only");
|
||||
dm_pool_metadata_read_only(pool->pmd);
|
||||
pool->process_bio = process_bio_read_only;
|
||||
pool->process_discard = process_bio_success;
|
||||
|
@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|||
* alarming rate. Adjust your low water mark if you're
|
||||
* frequently seeing this mode.
|
||||
*/
|
||||
if (old_mode != new_mode)
|
||||
notify_of_pool_mode_change_to_oods(pool);
|
||||
pool->out_of_data_space = true;
|
||||
pool->process_bio = process_bio_read_only;
|
||||
pool->process_discard = process_discard_bio;
|
||||
|
@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|||
break;
|
||||
|
||||
case PM_WRITE:
|
||||
if (old_mode != new_mode)
|
||||
notify_of_pool_mode_change(pool, "write");
|
||||
if (old_mode == PM_OUT_OF_DATA_SPACE)
|
||||
cancel_delayed_work_sync(&pool->no_space_timeout);
|
||||
pool->out_of_data_space = false;
|
||||
|
@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|||
* doesn't cause an unexpected mode transition on resume.
|
||||
*/
|
||||
pt->adjusted_pf.mode = new_mode;
|
||||
|
||||
if (old_mode != new_mode)
|
||||
notify_of_pool_mode_change(pool);
|
||||
}
|
||||
|
||||
static void abort_transaction(struct pool *pool)
|
||||
|
@ -4023,7 +4025,7 @@ static struct target_type pool_target = {
|
|||
.name = "thin-pool",
|
||||
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
|
||||
DM_TARGET_IMMUTABLE,
|
||||
.version = {1, 20, 0},
|
||||
.version = {1, 21, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = pool_ctr,
|
||||
.dtr = pool_dtr,
|
||||
|
@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|||
|
||||
static struct target_type thin_target = {
|
||||
.name = "thin",
|
||||
.version = {1, 20, 0},
|
||||
.version = {1, 21, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = thin_ctr,
|
||||
.dtr = thin_dtr,
|
||||
|
|
|
@ -20,7 +20,6 @@ struct dmz_bioctx {
|
|||
struct dm_zone *zone;
|
||||
struct bio *bio;
|
||||
refcount_t ref;
|
||||
blk_status_t status;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
|
|||
{
|
||||
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
|
||||
|
||||
if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK)
|
||||
bioctx->status = status;
|
||||
bio_endio(bio);
|
||||
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
|
||||
bio->bi_status = status;
|
||||
|
||||
if (refcount_dec_and_test(&bioctx->ref)) {
|
||||
struct dm_zone *zone = bioctx->zone;
|
||||
|
||||
if (zone) {
|
||||
if (bio->bi_status != BLK_STS_OK &&
|
||||
bio_op(bio) == REQ_OP_WRITE &&
|
||||
dmz_is_seq(zone))
|
||||
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
|
||||
dmz_deactivate_zone(zone);
|
||||
}
|
||||
bio_endio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Partial clone read BIO completion callback. This terminates the
|
||||
* Completion callback for an internally cloned target BIO. This terminates the
|
||||
* target BIO when there are no more references to its context.
|
||||
*/
|
||||
static void dmz_read_bio_end_io(struct bio *bio)
|
||||
static void dmz_clone_endio(struct bio *clone)
|
||||
{
|
||||
struct dmz_bioctx *bioctx = bio->bi_private;
|
||||
blk_status_t status = bio->bi_status;
|
||||
struct dmz_bioctx *bioctx = clone->bi_private;
|
||||
blk_status_t status = clone->bi_status;
|
||||
|
||||
bio_put(bio);
|
||||
bio_put(clone);
|
||||
dmz_bio_endio(bioctx->bio, status);
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue a BIO to a zone. The BIO may only partially process the
|
||||
* Issue a clone of a target BIO. The clone may only partially process the
|
||||
* original target BIO.
|
||||
*/
|
||||
static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
|
||||
struct bio *bio, sector_t chunk_block,
|
||||
unsigned int nr_blocks)
|
||||
static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
|
||||
struct bio *bio, sector_t chunk_block,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
|
||||
sector_t sector;
|
||||
struct bio *clone;
|
||||
|
||||
/* BIO remap sector */
|
||||
sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
|
||||
|
||||
/* If the read is not partial, there is no need to clone the BIO */
|
||||
if (nr_blocks == dmz_bio_blocks(bio)) {
|
||||
/* Setup and submit the BIO */
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
refcount_inc(&bioctx->ref);
|
||||
generic_make_request(bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Partial BIO: we need to clone the BIO */
|
||||
clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
|
||||
if (!clone)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Setup the clone */
|
||||
clone->bi_iter.bi_sector = sector;
|
||||
bio_set_dev(clone, dmz->dev->bdev);
|
||||
clone->bi_iter.bi_sector =
|
||||
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
|
||||
clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
|
||||
clone->bi_end_io = dmz_read_bio_end_io;
|
||||
clone->bi_end_io = dmz_clone_endio;
|
||||
clone->bi_private = bioctx;
|
||||
|
||||
bio_advance(bio, clone->bi_iter.bi_size);
|
||||
|
||||
/* Submit the clone */
|
||||
refcount_inc(&bioctx->ref);
|
||||
generic_make_request(clone);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
|
||||
zone->wp_block += nr_blocks;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
|
|||
if (nr_blocks) {
|
||||
/* Valid blocks found: read them */
|
||||
nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
|
||||
ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks);
|
||||
ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
chunk_block += nr_blocks;
|
||||
|
@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue a write BIO to a zone.
|
||||
*/
|
||||
static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
|
||||
struct bio *bio, sector_t chunk_block,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
|
||||
|
||||
/* Setup and submit the BIO */
|
||||
bio_set_dev(bio, dmz->dev->bdev);
|
||||
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
|
||||
refcount_inc(&bioctx->ref);
|
||||
generic_make_request(bio);
|
||||
|
||||
if (dmz_is_seq(zone))
|
||||
zone->wp_block += nr_blocks;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write blocks directly in a data zone, at the write pointer.
|
||||
* If a buffer zone is assigned, invalidate the blocks written
|
||||
|
@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz,
|
|||
return -EROFS;
|
||||
|
||||
/* Submit write */
|
||||
dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks);
|
||||
ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Validate the blocks in the data zone and invalidate
|
||||
|
@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
|
|||
return -EROFS;
|
||||
|
||||
/* Submit write */
|
||||
dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks);
|
||||
ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Validate the blocks in the buffer zone
|
||||
|
@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
|
|||
bioctx->zone = NULL;
|
||||
bioctx->bio = bio;
|
||||
refcount_set(&bioctx->ref, 1);
|
||||
bioctx->status = BLK_STS_OK;
|
||||
|
||||
/* Set the BIO pending in the flush list */
|
||||
if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
|
||||
|
@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
|
|||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Completed target BIO processing.
|
||||
*/
|
||||
static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
|
||||
{
|
||||
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
|
||||
|
||||
if (bioctx->status == BLK_STS_OK && *error)
|
||||
bioctx->status = *error;
|
||||
|
||||
if (!refcount_dec_and_test(&bioctx->ref))
|
||||
return DM_ENDIO_INCOMPLETE;
|
||||
|
||||
/* Done */
|
||||
bio->bi_status = bioctx->status;
|
||||
|
||||
if (bioctx->zone) {
|
||||
struct dm_zone *zone = bioctx->zone;
|
||||
|
||||
if (*error && bio_op(bio) == REQ_OP_WRITE) {
|
||||
if (dmz_is_seq(zone))
|
||||
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
|
||||
}
|
||||
dmz_deactivate_zone(zone);
|
||||
}
|
||||
|
||||
return DM_ENDIO_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get zoned device information.
|
||||
*/
|
||||
|
@ -946,7 +901,6 @@ static struct target_type dmz_type = {
|
|||
.ctr = dmz_ctr,
|
||||
.dtr = dmz_dtr,
|
||||
.map = dmz_map,
|
||||
.end_io = dmz_end_io,
|
||||
.io_hints = dmz_io_hints,
|
||||
.prepare_ioctl = dmz_prepare_ioctl,
|
||||
.postsuspend = dmz_suspend,
|
||||
|
|
|
@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
|
|||
return ret;
|
||||
}
|
||||
|
||||
blk_queue_split(md->queue, &bio);
|
||||
|
||||
init_clone_info(&ci, md, map, bio);
|
||||
|
||||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
|
|
Loading…
Reference in New Issue