A few fixes for problems discovered during the 3.6 cycle.

Of particular note, are fixes to the thin target's discard support,
 which I hope is finally working correctly; and fixes for multipath
 ioctls and device limits when there are no paths.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJQY5sSAAoJEK2W1qbAHj1nUKwP/RDEYw6o4WWvnMvMtyhapPaM
 yInBlep/TSr4mA7QPoe0zV9G8guZoZZaeGEgP/T4Je//ZBqw1xhEG6RIVEAgEUh/
 GrJvfYGXYoLhUiCg99vR1oWT5hQvQp8SYG9lJ1+AsjqEgVwBj7uKgl2wmmvars9X
 gPzXqqzv2IlQjhu6eLvShUixk4HFTQfPMDGaPGWWV8nNcWc0Pnb5TVZiuMeNJGrf
 Srem0ScRNF6P9stUqMA93kHp2KRKHwP6kelnuok9CW/RfcNnux1+8015DXcdbOr9
 X1+mi6VIL0Hjp5R/io0FE1YdJDyR6U/Rwjo3jHkblnegRMOMnK3bOTHmhepW/HUe
 Mav9gcXvEXNpqEvQJsaRmhR36ZgJan5mpxaSTeK1HcPuP0wePEN9Lh/ZJDY7oaB1
 33ntNV8LFIj4jXOcIJZkyAf9l/RdI7mAZ4HwNxPiNncG7LSNataguKYA1sZw9/E8
 njBbn9PyDl/arXQVCJa5ARa2hOHqtNViNGqqNVjQ6ySJuz1HgzslXqzPVG/geZQd
 yPs3ylkMNl+vbCZaEDwkuuEpOeiMgNo1BxVuhGuJMIe5Fs1lsjWbUnvwT9a0XsCQ
 fDPFAFZOfb3Xn6AV0za1SyIVgvsHoX8COBViPh8m+PaXgyTB2wf+vkgRMgwAhBRR
 IV5v+oWZSL8ayoe5okEv
 =J1f2
 -----END PGP SIGNATURE-----

Merge tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull dm fixes from Alasdair G Kergon:
 "A few fixes for problems discovered during the 3.6 cycle.

  Of particular note, are fixes to the thin target's discard support,
  which I hope is finally working correctly; and fixes for multipath
  ioctls and device limits when there are no paths."

* tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm verity: fix overflow check
  dm thin: fix discard support for data devices
  dm thin: tidy discard support
  dm: retain table limits when swapping to new table with no devices
  dm table: clear add_random unless all devices have it set
  dm: handle requests beyond end of device instead of using BUG_ON
  dm mpath: only retry ioctl when no paths if queue_if_no_path set
  dm thin: do not set discard_zeroes_data
This commit is contained in:
Linus Torvalds 2012-09-28 10:00:01 -07:00
commit c3a086e638
6 changed files with 209 additions and 78 deletions

View File

@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
unsigned long arg)
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
struct block_device *bdev;
fmode_t mode;
unsigned long flags;
@ -1570,12 +1571,14 @@ again:
if (!m->current_pgpath)
__choose_pgpath(m, 0);
if (m->current_pgpath) {
bdev = m->current_pgpath->path.dev->bdev;
mode = m->current_pgpath->path.dev->mode;
pgpath = m->current_pgpath;
if (pgpath) {
bdev = pgpath->path.dev->bdev;
mode = pgpath->path.dev->mode;
}
if (m->queue_io)
if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
r = -EAGAIN;
else if (!bdev)
r = -EIO;

View File

@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
return &t->targets[(KEYS_PER_NODE * n) + k];
}
static int count_device(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
unsigned *num_devices = data;
(*num_devices)++;
return 0;
}
/*
* Check whether a table has no data devices attached using each
* target's iterate_devices method.
* Returns false if the result is unknown because a target doesn't
* support iterate_devices.
*/
bool dm_table_has_no_data_devices(struct dm_table *table)
{
struct dm_target *uninitialized_var(ti);
unsigned i = 0, num_devices = 0;
while (i < dm_table_get_num_targets(table)) {
ti = dm_table_get_target(table, i++);
if (!ti->type->iterate_devices)
return false;
ti->type->iterate_devices(ti, count_device, &num_devices);
if (num_devices)
return false;
}
return true;
}
/*
* Establish the new table's queue_limits and validate them.
*/
@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
return q && blk_queue_nonrot(q);
}
static bool dm_table_is_nonrot(struct dm_table *t)
static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && !blk_queue_add_random(q);
}
static bool dm_table_all_devices_attribute(struct dm_table *t,
iterate_devices_callout_fn func)
{
struct dm_target *ti;
unsigned i = 0;
/* Ensure that all underlying device are non-rotational. */
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!ti->type->iterate_devices ||
!ti->type->iterate_devices(ti, device_is_nonrot, NULL))
!ti->type->iterate_devices(ti, func, NULL))
return 0;
}
@ -1396,13 +1439,23 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_discard_zeroes_data(t))
q->limits.discard_zeroes_data = 0;
if (dm_table_is_nonrot(t))
/* Ensure that all underlying devices are non-rotational. */
if (dm_table_all_devices_attribute(t, device_is_nonrot))
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
dm_table_set_integrity(t);
/*
* Determine whether or not this queue's I/O timings contribute
* to the entropy pool, Only request-based targets use this.
* Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
* have it set.
*/
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
/*
* QUEUE_FLAG_STACKABLE must be set after all queue settings are
* visible to other CPUs because, once the flag is set, incoming bios

View File

@ -509,9 +509,9 @@ enum pool_mode {
struct pool_features {
enum pool_mode mode;
unsigned zero_new_blocks:1;
unsigned discard_enabled:1;
unsigned discard_passdown:1;
bool zero_new_blocks:1;
bool discard_enabled:1;
bool discard_passdown:1;
};
struct thin_c;
@ -580,7 +580,8 @@ struct pool_c {
struct dm_target_callbacks callbacks;
dm_block_t low_water_blocks;
struct pool_features pf;
struct pool_features requested_pf; /* Features requested during table load */
struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */
};
/*
@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool)
/*----------------------------------------------------------------
* Binding of control targets to a pool object
*--------------------------------------------------------------*/
static bool data_dev_supports_discard(struct pool_c *pt)
{
struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
return q && blk_queue_discard(q);
}
/*
* If discard_passdown was enabled verify that the data device
* supports discards. Disable discard_passdown if not.
*/
static void disable_passdown_if_not_supported(struct pool_c *pt)
{
struct pool *pool = pt->pool;
struct block_device *data_bdev = pt->data_dev->bdev;
struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
const char *reason = NULL;
char buf[BDEVNAME_SIZE];
if (!pt->adjusted_pf.discard_passdown)
return;
if (!data_dev_supports_discard(pt))
reason = "discard unsupported";
else if (data_limits->max_discard_sectors < pool->sectors_per_block)
reason = "max discard sectors smaller than a block";
else if (data_limits->discard_granularity > block_size)
reason = "discard granularity larger than a block";
else if (block_size & (data_limits->discard_granularity - 1))
reason = "discard granularity not a factor of block size";
if (reason) {
DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
pt->adjusted_pf.discard_passdown = false;
}
}
static int bind_control_target(struct pool *pool, struct dm_target *ti)
{
struct pool_c *pt = ti->private;
@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
* We want to make sure that degraded pools are never upgraded.
*/
enum pool_mode old_mode = pool->pf.mode;
enum pool_mode new_mode = pt->pf.mode;
enum pool_mode new_mode = pt->adjusted_pf.mode;
if (old_mode > new_mode)
new_mode = old_mode;
pool->ti = ti;
pool->low_water_blocks = pt->low_water_blocks;
pool->pf = pt->pf;
set_pool_mode(pool, new_mode);
pool->pf = pt->adjusted_pf;
/*
* If discard_passdown was enabled verify that the data device
* supports discards. Disable discard_passdown if not; otherwise
* -EOPNOTSUPP will be returned.
*/
/* FIXME: pull this out into a sep fn. */
if (pt->pf.discard_passdown) {
struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
if (!q || !blk_queue_discard(q)) {
char buf[BDEVNAME_SIZE];
DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
bdevname(pt->data_dev->bdev, buf));
pool->pf.discard_passdown = 0;
}
}
set_pool_mode(pool, new_mode);
return 0;
}
@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
static void pool_features_init(struct pool_features *pf)
{
pf->mode = PM_WRITE;
pf->zero_new_blocks = 1;
pf->discard_enabled = 1;
pf->discard_passdown = 1;
pf->zero_new_blocks = true;
pf->discard_enabled = true;
pf->discard_passdown = true;
}
static void __pool_destroy(struct pool *pool)
@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
argc--;
if (!strcasecmp(arg_name, "skip_block_zeroing"))
pf->zero_new_blocks = 0;
pf->zero_new_blocks = false;
else if (!strcasecmp(arg_name, "ignore_discard"))
pf->discard_enabled = 0;
pf->discard_enabled = false;
else if (!strcasecmp(arg_name, "no_discard_passdown"))
pf->discard_passdown = 0;
pf->discard_passdown = false;
else if (!strcasecmp(arg_name, "read_only"))
pf->mode = PM_READ_ONLY;
@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
pt->metadata_dev = metadata_dev;
pt->data_dev = data_dev;
pt->low_water_blocks = low_water_blocks;
pt->pf = pf;
pt->adjusted_pf = pt->requested_pf = pf;
ti->num_flush_requests = 1;
/*
* Only need to enable discards if the pool should pass
* them down to the data device. The thin device's discard
@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
*/
if (pf.discard_enabled && pf.discard_passdown) {
ti->num_discard_requests = 1;
/*
* Setting 'discards_supported' circumvents the normal
* stacking of discard limits (this keeps the pool and
* thin devices' discard limits consistent).
*/
ti->discards_supported = true;
ti->discard_zeroes_data_unsupported = true;
}
ti->private = pt;
@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
(unsigned long)pool->sectors_per_block,
(unsigned long long)pt->low_water_blocks);
emit_flags(&pt->pf, result, sz, maxlen);
emit_flags(&pt->requested_pf, result, sz, maxlen);
break;
}
@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
{
/*
* FIXME: these limits may be incompatible with the pool's data device
*/
struct pool *pool = pt->pool;
struct queue_limits *data_limits;
limits->max_discard_sectors = pool->sectors_per_block;
/*
* This is just a hint, and not enforced. We have to cope with
* bios that cover a block partially. A discard that spans a block
* boundary is not sent to this target.
* discard_granularity is just a hint, and not enforced.
*/
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
limits->discard_zeroes_data = pool->pf.zero_new_blocks;
if (pt->adjusted_pf.discard_passdown) {
data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
limits->discard_granularity = data_limits->discard_granularity;
} else
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
}
static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, 0);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
if (pool->pf.discard_enabled)
set_discard_limits(pool, limits);
/*
* pt->adjusted_pf is a staging area for the actual features to use.
* They get transferred to the live pool in bind_control_target()
* called from pool_preresume().
*/
if (!pt->adjusted_pf.discard_enabled)
return;
disable_passdown_if_not_supported(pt);
set_discard_limits(pt, limits);
}
static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
.version = {1, 3, 0},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,
return 0;
}
/*
* A thin device always inherits its queue limits from its pool.
*/
static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct thin_c *tc = ti->private;
struct pool *pool = tc->pool;
blk_limits_io_min(limits, 0);
blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
set_discard_limits(pool, limits);
*limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
}
static struct target_type thin_target = {
.name = "thin",
.version = {1, 3, 0},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,

View File

@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
v->hash_dev_block_bits = ffs(num) - 1;
if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) !=
(sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) {
(sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
>> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
ti->error = "Invalid data blocks";
r = -EINVAL;
goto bad;
@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) !=
(sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) {
(sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
>> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
ti->error = "Invalid hash start";
r = -EINVAL;
goto bad;

View File

@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
{
int r = error;
struct dm_rq_target_io *tio = clone->end_io_data;
dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
dm_request_endio_fn rq_end_io = NULL;
if (mapped && rq_end_io)
r = rq_end_io(tio->ti, clone, error, &tio->info);
if (tio->ti) {
rq_end_io = tio->ti->type->rq_end_io;
if (mapped && rq_end_io)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
if (r <= 0)
/* The target wants to complete the I/O */
@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
int r, requeued = 0;
struct dm_rq_target_io *tio = clone->end_io_data;
/*
* Hold the md reference here for the in-flight I/O.
* We can't rely on the reference count by device opener,
* because the device may be closed during the request completion
* when all bios are completed.
* See the comment in rq_completed() too.
*/
dm_get(md);
tio->ti = ti;
r = ti->type->map_rq(ti, clone, &tio->info);
switch (r) {
@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone,
return requeued;
}
static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
{
struct request *clone;
blk_start_request(orig);
clone = orig->special;
atomic_inc(&md->pending[rq_data_dir(clone)]);
/*
* Hold the md reference here for the in-flight I/O.
* We can't rely on the reference count by device opener,
* because the device may be closed during the request completion
* when all bios are completed.
* See the comment in rq_completed() too.
*/
dm_get(md);
return clone;
}
/*
* q->request_fn for request-based dm.
* Called with the queue lock held.
@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q)
pos = blk_rq_pos(rq);
ti = dm_table_find_target(map, pos);
BUG_ON(!dm_target_is_valid(ti));
if (!dm_target_is_valid(ti)) {
/*
* Must perform setup, that dm_done() requires,
* before calling dm_kill_unmapped_request
*/
DMERR_LIMIT("request attempted access beyond the end of device");
clone = dm_start_request(md, rq);
dm_kill_unmapped_request(clone, -EIO);
continue;
}
if (ti->type->busy && ti->type->busy(ti))
goto delay_and_out;
blk_start_request(rq);
clone = rq->special;
atomic_inc(&md->pending[rq_data_dir(clone)]);
clone = dm_start_request(md, rq);
spin_unlock(q->queue_lock);
if (map_request(ti, clone, md))
@ -1684,8 +1706,6 @@ delay_and_out:
blk_delay_queue(q, HZ / 10);
out:
dm_table_put(map);
return;
}
int dm_underlying_device_busy(struct request_queue *q)
@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md)
*/
struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
{
struct dm_table *map = ERR_PTR(-EINVAL);
struct dm_table *live_map, *map = ERR_PTR(-EINVAL);
struct queue_limits limits;
int r;
@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
if (!dm_suspended_md(md))
goto out;
/*
* If the new table has no data devices, retain the existing limits.
* This helps multipath with queue_if_no_path if all paths disappear,
* then new I/O is queued based on these limits, and then some paths
* reappear.
*/
if (dm_table_has_no_data_devices(table)) {
live_map = dm_get_live_table(md);
if (live_map)
limits = md->queue->limits;
dm_table_put(live_map);
}
r = dm_calculate_queue_limits(table, &limits);
if (r) {
map = ERR_PTR(r);

View File

@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table,
struct queue_limits *limits);
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,