diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d8abb90a6c2f..034233eefc82 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct multipath *m = ti->private; + struct pgpath *pgpath; struct block_device *bdev; fmode_t mode; unsigned long flags; @@ -1570,12 +1571,14 @@ again: if (!m->current_pgpath) __choose_pgpath(m, 0); - if (m->current_pgpath) { - bdev = m->current_pgpath->path.dev->bdev; - mode = m->current_pgpath->path.dev->mode; + pgpath = m->current_pgpath; + + if (pgpath) { + bdev = pgpath->path.dev->bdev; + mode = pgpath->path.dev->mode; } - if (m->queue_io) + if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) r = -EAGAIN; else if (!bdev) r = -EIO; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f90069029aae..100368eb7991 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +static int count_device(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned *num_devices = data; + + (*num_devices)++; + + return 0; +} + +/* + * Check whether a table has no data devices attached using each + * target's iterate_devices method. + * Returns false if the result is unknown because a target doesn't + * support iterate_devices. + */ +bool dm_table_has_no_data_devices(struct dm_table *table) +{ + struct dm_target *uninitialized_var(ti); + unsigned i = 0, num_devices = 0; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (!ti->type->iterate_devices) + return false; + + ti->type->iterate_devices(ti, count_device, &num_devices); + if (num_devices) + return false; + } + + return true; +} + /* * Establish the new table's queue_limits and validate them. */ @@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, return q && blk_queue_nonrot(q); } -static bool dm_table_is_nonrot(struct dm_table *t) +static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !blk_queue_add_random(q); +} + +static bool dm_table_all_devices_attribute(struct dm_table *t, + iterate_devices_callout_fn func) { struct dm_target *ti; unsigned i = 0; - /* Ensure that all underlying device are non-rotational. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) + !ti->type->iterate_devices(ti, func, NULL)) return 0; } @@ -1396,13 +1439,23 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_discard_zeroes_data(t)) q->limits.discard_zeroes_data = 0; - if (dm_table_is_nonrot(t)) + /* Ensure that all underlying devices are non-rotational. */ + if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); dm_table_set_integrity(t); + /* + * Determine whether or not this queue's I/O timings contribute + * to the entropy pool, Only request-based targets use this. + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not + * have it set. + */ + if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + /* * QUEUE_FLAG_STACKABLE must be set after all queue settings are * visible to other CPUs because, once the flag is set, incoming bios diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af1fc3b2c2ad..c29410af1e22 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -509,9 +509,9 @@ enum pool_mode { struct pool_features { enum pool_mode mode; - unsigned zero_new_blocks:1; - unsigned discard_enabled:1; - unsigned discard_passdown:1; + bool zero_new_blocks:1; + bool discard_enabled:1; + bool discard_passdown:1; }; struct thin_c; @@ -580,7 +580,8 @@ struct pool_c { struct dm_target_callbacks callbacks; dm_block_t low_water_blocks; - struct pool_features pf; + struct pool_features requested_pf; /* Features requested during table load */ + struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ }; /* @@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ +static bool data_dev_supports_discard(struct pool_c *pt) +{ + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the data device + * supports discards. Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct pool_c *pt) +{ + struct pool *pool = pt->pool; + struct block_device *data_bdev = pt->data_dev->bdev; + struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; + const char *reason = NULL; + char buf[BDEVNAME_SIZE]; + + if (!pt->adjusted_pf.discard_passdown) + return; + + if (!data_dev_supports_discard(pt)) + reason = "discard unsupported"; + + else if (data_limits->max_discard_sectors < pool->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + else if (data_limits->discard_granularity > block_size) + reason = "discard granularity larger than a block"; + + else if (block_size & (data_limits->discard_granularity - 1)) + reason = "discard granularity not a factor of block size"; + + if (reason) { + DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); + pt->adjusted_pf.discard_passdown = false; + } +} + static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; @@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) * We want to make sure that degraded pools are never upgraded. */ enum pool_mode old_mode = pool->pf.mode; - enum pool_mode new_mode = pt->pf.mode; + enum pool_mode new_mode = pt->adjusted_pf.mode; if (old_mode > new_mode) new_mode = old_mode; pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; - pool->pf = pt->pf; - set_pool_mode(pool, new_mode); + pool->pf = pt->adjusted_pf; - /* - * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. - */ - /* FIXME: pull this out into a sep fn. */ - if (pt->pf.discard_passdown) { - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - if (!q || !blk_queue_discard(q)) { - char buf[BDEVNAME_SIZE]; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); - pool->pf.discard_passdown = 0; - } - } + set_pool_mode(pool, new_mode); return 0; } @@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) static void pool_features_init(struct pool_features *pf) { pf->mode = PM_WRITE; - pf->zero_new_blocks = 1; - pf->discard_enabled = 1; - pf->discard_passdown = 1; + pf->zero_new_blocks = true; + pf->discard_enabled = true; + pf->discard_passdown = true; } static void __pool_destroy(struct pool *pool) @@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, argc--; if (!strcasecmp(arg_name, "skip_block_zeroing")) - pf->zero_new_blocks = 0; + pf->zero_new_blocks = false; else if (!strcasecmp(arg_name, "ignore_discard")) - pf->discard_enabled = 0; + pf->discard_enabled = false; else if (!strcasecmp(arg_name, "no_discard_passdown")) - pf->discard_passdown = 0; + pf->discard_passdown = false; else if (!strcasecmp(arg_name, "read_only")) pf->mode = PM_READ_ONLY; @@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->metadata_dev = metadata_dev; pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; - pt->pf = pf; + pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_requests = 1; + /* * Only need to enable discards if the pool should pass * them down to the data device. The thin device's discard @@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) */ if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_requests = 1; + /* * Setting 'discards_supported' circumvents the normal * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - emit_flags(&pt->pf, result, sz, maxlen); + emit_flags(&pt->requested_pf, result, sz, maxlen); break; } @@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { - /* - * FIXME: these limits may be incompatible with the pool's data device - */ + struct pool *pool = pt->pool; + struct queue_limits *data_limits; + limits->max_discard_sectors = pool->sectors_per_block; /* - * This is just a hint, and not enforced. We have to cope with - * bios that cover a block partially. A discard that spans a block - * boundary is not sent to this target. + * discard_granularity is just a hint, and not enforced. */ - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - limits->discard_zeroes_data = pool->pf.zero_new_blocks; + if (pt->adjusted_pf.discard_passdown) { + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; + limits->discard_granularity = data_limits->discard_granularity; + } else + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, 0); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - if (pool->pf.discard_enabled) - set_discard_limits(pool, limits); + + /* + * pt->adjusted_pf is a staging area for the actual features to use. + * They get transferred to the live pool in bind_control_target() + * called from pool_preresume(). + */ + if (!pt->adjusted_pf.discard_enabled) + return; + + disable_passdown_if_not_supported(pt); + + set_discard_limits(pt, limits); } static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +/* + * A thin device always inherits its queue limits from its pool. + */ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; - struct pool *pool = tc->pool; - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - set_discard_limits(pool, limits); + *limits = bdev_get_queue(tc->pool_dev->bdev)->limits; } static struct target_type thin_target = { .name = "thin", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 254d19268ad2..892ae2766aa6 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->hash_dev_block_bits = ffs(num) - 1; if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid data blocks"; r = -EINVAL; goto bad; @@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) + >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid hash start"; r = -EINVAL; goto bad; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e09b6ff5b49..67ffa391edcf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped) { int r = error; struct dm_rq_target_io *tio = clone->end_io_data; - dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; + dm_request_endio_fn rq_end_io = NULL; - if (mapped && rq_end_io) - r = rq_end_io(tio->ti, clone, error, &tio->info); + if (tio->ti) { + rq_end_io = tio->ti->type->rq_end_io; + + if (mapped && rq_end_io) + r = rq_end_io(tio->ti, clone, error, &tio->info); + } if (r <= 0) /* The target wants to complete the I/O */ @@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone, int r, requeued = 0; struct dm_rq_target_io *tio = clone->end_io_data; - /* - * Hold the md reference here for the in-flight I/O. - * We can't rely on the reference count by device opener, - * because the device may be closed during the request completion - * when all bios are completed. - * See the comment in rq_completed() too. - */ - dm_get(md); - tio->ti = ti; r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { @@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone, return requeued; } +static struct request *dm_start_request(struct mapped_device *md, struct request *orig) +{ + struct request *clone; + + blk_start_request(orig); + clone = orig->special; + atomic_inc(&md->pending[rq_data_dir(clone)]); + + /* + * Hold the md reference here for the in-flight I/O. + * We can't rely on the reference count by device opener, + * because the device may be closed during the request completion + * when all bios are completed. + * See the comment in rq_completed() too. + */ + dm_get(md); + + return clone; +} + /* * q->request_fn for request-based dm. * Called with the queue lock held. @@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q) pos = blk_rq_pos(rq); ti = dm_table_find_target(map, pos); - BUG_ON(!dm_target_is_valid(ti)); + if (!dm_target_is_valid(ti)) { + /* + * Must perform setup, that dm_done() requires, + * before calling dm_kill_unmapped_request + */ + DMERR_LIMIT("request attempted access beyond the end of device"); + clone = dm_start_request(md, rq); + dm_kill_unmapped_request(clone, -EIO); + continue; + } if (ti->type->busy && ti->type->busy(ti)) goto delay_and_out; - blk_start_request(rq); - clone = rq->special; - atomic_inc(&md->pending[rq_data_dir(clone)]); + clone = dm_start_request(md, rq); spin_unlock(q->queue_lock); if (map_request(ti, clone, md)) @@ -1684,8 +1706,6 @@ delay_and_out: blk_delay_queue(q, HZ / 10); out: dm_table_put(map); - - return; } int dm_underlying_device_busy(struct request_queue *q) @@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *map = ERR_PTR(-EINVAL); + struct dm_table *live_map, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) if (!dm_suspended_md(md)) goto out; + /* + * If the new table has no data devices, retain the existing limits. + * This helps multipath with queue_if_no_path if all paths disappear, + * then new I/O is queued based on these limits, and then some paths + * reappear. + */ + if (dm_table_has_no_data_devices(table)) { + live_map = dm_get_live_table(md); + if (live_map) + limits = md->queue->limits; + dm_table_put(live_map); + } + r = dm_calculate_queue_limits(table, &limits); if (r) { map = ERR_PTR(r); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 52eef493d266..6a99fefaa743 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); +bool dm_table_has_no_data_devices(struct dm_table *table); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,