A set of device-mapper fixes for 3.12.
A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error handling fixes for dm-multipath. A fix for the thin provisioning target to not expose non-zero discard limits if discards are disabled. Lastly, add two DM module parameters which allow users to tune the emergency memory reserves that DM mainatins per device -- this helps fix a long-standing issue for dm-multipath. The conservative default reserve for request-based dm-multipath devices (256) has proven problematic for users with many multipathed SCSI devices but relatively little memory. To responsibly select a smaller value users should use the new nr_bios tracepoint info (via commit75afb352
"block: Add nr_bios to block_rq_remap tracepoint") to determine the peak number of bios their workloads create. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.14 (GNU/Linux) iQEcBAABAgAGBQJSQMVHAAoJEMUj8QotnQNaOXgIAJS6/XJKMoHfiDJ9M+XD34rZ Uyr9TEnubX3DKCRBiY23MUcCQn3fx6BjCGv5/c8L4jQFIuLyDi2yatqpwXcbGSJh G/S/y6u0Axek+ew7TS80OFop4nblW6MoKnoh9/4N55Ofa+1WvKM4ERUGjHGbauyS TxmLQPToCFPLYRIOZ+imd6hQuIZ1+FFdJFvi7kY9O6Llx2sLD6fWi1iruBd/Da2H ByMX3biGN45mSpcBzRbSC/FkJ9CRIvT9n82BDPS0o3Tllt8NaVlEDaovB7h4ncc0 bFuT2Z3Q38B9uZ8Lj0bqdGzv3kXMLCkLo6WhWjyUt84hmDPAzRpBwt60jUqWyZs= =bjVp -----END PGP SIGNATURE----- Merge tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device-mapper fixes from Mike Snitzer: "A few fixes for dm-snapshot, a 32 bit fix for dm-stats, a couple error handling fixes for dm-multipath. A fix for the thin provisioning target to not expose non-zero discard limits if discards are disabled. Lastly, add two DM module parameters which allow users to tune the emergency memory reserves that DM mainatins per device -- this helps fix a long-standing issue for dm-multipath. The conservative default reserve for request-based dm-multipath devices (256) has proven problematic for users with many multipathed SCSI devices but relatively little memory. To responsibly select a smaller value users should use the new nr_bios tracepoint info (via commit75afb352
"block: Add nr_bios to block_rq_remap tracepoint") to determine the peak number of bios their workloads create" * tag 'dm-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: add reserved_bio_based_ios module parameter dm: add reserved_rq_based_ios module parameter dm: lower bio-based mempool reservation dm thin: do not expose non-zero discard limits if discards disabled dm mpath: disable WRITE SAME if it fails dm-snapshot: fix performance degradation due to small hash size dm snapshot: workaround for a false positive lockdep warning dm stats: fix possible counter corruption on 32-bit systems dm mpath: do not fail path on -ENOSPC
This commit is contained in:
commit
e93dd910b9
|
@ -19,8 +19,6 @@
|
|||
#define DM_MSG_PREFIX "io"
|
||||
|
||||
#define DM_IO_MAX_REGIONS BITS_PER_LONG
|
||||
#define MIN_IOS 16
|
||||
#define MIN_BIOS 16
|
||||
|
||||
struct dm_io_client {
|
||||
mempool_t *pool;
|
||||
|
@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
|
|||
struct dm_io_client *dm_io_client_create(void)
|
||||
{
|
||||
struct dm_io_client *client;
|
||||
unsigned min_ios = dm_get_reserved_bio_based_ios();
|
||||
|
||||
client = kmalloc(sizeof(*client), GFP_KERNEL);
|
||||
if (!client)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
|
||||
client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
|
||||
if (!client->pool)
|
||||
goto bad;
|
||||
|
||||
client->bios = bioset_create(MIN_BIOS, 0);
|
||||
client->bios = bioset_create(min_ios, 0);
|
||||
if (!client->bios)
|
||||
goto bad;
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-path-selector.h"
|
||||
#include "dm-uevent.h"
|
||||
|
||||
|
@ -116,8 +117,6 @@ struct dm_mpath_io {
|
|||
|
||||
typedef int (*action_fn) (struct pgpath *pgpath);
|
||||
|
||||
#define MIN_IOS 256 /* Mempool size */
|
||||
|
||||
static struct kmem_cache *_mpio_cache;
|
||||
|
||||
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
|
||||
|
@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
|
|||
static struct multipath *alloc_multipath(struct dm_target *ti)
|
||||
{
|
||||
struct multipath *m;
|
||||
unsigned min_ios = dm_get_reserved_rq_based_ios();
|
||||
|
||||
m = kzalloc(sizeof(*m), GFP_KERNEL);
|
||||
if (m) {
|
||||
|
@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
|
|||
INIT_WORK(&m->trigger_event, trigger_event);
|
||||
init_waitqueue_head(&m->pg_init_wait);
|
||||
mutex_init(&m->work_mutex);
|
||||
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
|
||||
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
|
||||
if (!m->mpio_pool) {
|
||||
kfree(m);
|
||||
return NULL;
|
||||
|
@ -1268,6 +1268,7 @@ static int noretry_error(int error)
|
|||
case -EREMOTEIO:
|
||||
case -EILSEQ:
|
||||
case -ENODATA:
|
||||
case -ENOSPC:
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
|
|||
if (!error && !clone->errors)
|
||||
return 0; /* I/O complete */
|
||||
|
||||
if (noretry_error(error))
|
||||
if (noretry_error(error)) {
|
||||
if ((clone->cmd_flags & REQ_WRITE_SAME) &&
|
||||
!clone->q->limits.max_write_same_sectors) {
|
||||
struct queue_limits *limits;
|
||||
|
||||
/* device doesn't really support WRITE SAME, disable it */
|
||||
limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
|
||||
limits->max_write_same_sectors = 0;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
if (mpio->pgpath)
|
||||
fail_path(mpio->pgpath);
|
||||
|
|
|
@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
|
|||
*/
|
||||
INIT_WORK_ONSTACK(&req.work, do_metadata);
|
||||
queue_work(ps->metadata_wq, &req.work);
|
||||
flush_work(&req.work);
|
||||
flush_workqueue(ps->metadata_wq);
|
||||
|
||||
return req.result;
|
||||
}
|
||||
|
|
|
@ -725,17 +725,16 @@ static int calc_max_buckets(void)
|
|||
*/
|
||||
static int init_hash_tables(struct dm_snapshot *s)
|
||||
{
|
||||
sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
|
||||
sector_t hash_size, cow_dev_size, max_buckets;
|
||||
|
||||
/*
|
||||
* Calculate based on the size of the original volume or
|
||||
* the COW volume...
|
||||
*/
|
||||
cow_dev_size = get_dev_size(s->cow->bdev);
|
||||
origin_dev_size = get_dev_size(s->origin->bdev);
|
||||
max_buckets = calc_max_buckets();
|
||||
|
||||
hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
|
||||
hash_size = cow_dev_size >> s->store->chunk_shift;
|
||||
hash_size = min(hash_size, max_buckets);
|
||||
|
||||
if (hash_size < 64)
|
||||
|
|
|
@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
|
|||
struct dm_stat_percpu *p;
|
||||
|
||||
/*
|
||||
* For strict correctness we should use local_irq_disable/enable
|
||||
* For strict correctness we should use local_irq_save/restore
|
||||
* instead of preempt_disable/enable.
|
||||
*
|
||||
* This is racy if the driver finishes bios from non-interrupt
|
||||
* context as well as from interrupt context or from more different
|
||||
* interrupts.
|
||||
* preempt_disable/enable is racy if the driver finishes bios
|
||||
* from non-interrupt context as well as from interrupt context
|
||||
* or from more different interrupts.
|
||||
*
|
||||
* However, the race only results in not counting some events,
|
||||
* so it is acceptable.
|
||||
* On 64-bit architectures the race only results in not counting some
|
||||
* events, so it is acceptable. On 32-bit architectures the race could
|
||||
* cause the counter going off by 2^32, so we need to do proper locking
|
||||
* there.
|
||||
*
|
||||
* part_stat_lock()/part_stat_unlock() have this race too.
|
||||
*/
|
||||
#if BITS_PER_LONG == 32
|
||||
unsigned long flags;
|
||||
local_irq_save(flags);
|
||||
#else
|
||||
preempt_disable();
|
||||
#endif
|
||||
p = &s->stat_percpu[smp_processor_id()][entry];
|
||||
|
||||
if (!end) {
|
||||
|
@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
|
|||
p->ticks[idx] += duration;
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
local_irq_restore(flags);
|
||||
#else
|
||||
preempt_enable();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
|
||||
|
|
|
@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
* them down to the data device. The thin device's discard
|
||||
* processing will cause mappings to be removed from the btree.
|
||||
*/
|
||||
ti->discard_zeroes_data_unsupported = true;
|
||||
if (pf.discard_enabled && pf.discard_passdown) {
|
||||
ti->num_discard_bios = 1;
|
||||
|
||||
|
@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
* thin devices' discard limits consistent).
|
||||
*/
|
||||
ti->discards_supported = true;
|
||||
ti->discard_zeroes_data_unsupported = true;
|
||||
}
|
||||
ti->private = pt;
|
||||
|
||||
|
@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|||
* They get transferred to the live pool in bind_control_target()
|
||||
* called from pool_preresume().
|
||||
*/
|
||||
if (!pt->adjusted_pf.discard_enabled)
|
||||
if (!pt->adjusted_pf.discard_enabled) {
|
||||
/*
|
||||
* Must explicitly disallow stacking discard limits otherwise the
|
||||
* block layer will stack them if pool's data device has support.
|
||||
* QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
|
||||
* user to see that, so make sure to set all discard limits to 0.
|
||||
*/
|
||||
limits->discard_granularity = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
disable_passdown_if_not_supported(pt);
|
||||
|
||||
|
@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
|
||||
|
||||
/* In case the pool supports discards, pass them on. */
|
||||
ti->discard_zeroes_data_unsupported = true;
|
||||
if (tc->pool->pf.discard_enabled) {
|
||||
ti->discards_supported = true;
|
||||
ti->num_discard_bios = 1;
|
||||
ti->discard_zeroes_data_unsupported = true;
|
||||
/* Discard bios must be split on a block boundary */
|
||||
ti->split_discard_bios = true;
|
||||
}
|
||||
|
|
|
@ -211,10 +211,55 @@ struct dm_md_mempools {
|
|||
struct bio_set *bs;
|
||||
};
|
||||
|
||||
#define MIN_IOS 256
|
||||
#define RESERVED_BIO_BASED_IOS 16
|
||||
#define RESERVED_REQUEST_BASED_IOS 256
|
||||
#define RESERVED_MAX_IOS 1024
|
||||
static struct kmem_cache *_io_cache;
|
||||
static struct kmem_cache *_rq_tio_cache;
|
||||
|
||||
/*
|
||||
* Bio-based DM's mempools' reserved IOs set by the user.
|
||||
*/
|
||||
static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
|
||||
|
||||
/*
|
||||
* Request-based DM's mempools' reserved IOs set by the user.
|
||||
*/
|
||||
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
|
||||
|
||||
static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
|
||||
unsigned def, unsigned max)
|
||||
{
|
||||
unsigned ios = ACCESS_ONCE(*reserved_ios);
|
||||
unsigned modified_ios = 0;
|
||||
|
||||
if (!ios)
|
||||
modified_ios = def;
|
||||
else if (ios > max)
|
||||
modified_ios = max;
|
||||
|
||||
if (modified_ios) {
|
||||
(void)cmpxchg(reserved_ios, ios, modified_ios);
|
||||
ios = modified_ios;
|
||||
}
|
||||
|
||||
return ios;
|
||||
}
|
||||
|
||||
unsigned dm_get_reserved_bio_based_ios(void)
|
||||
{
|
||||
return __dm_get_reserved_ios(&reserved_bio_based_ios,
|
||||
RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
|
||||
|
||||
unsigned dm_get_reserved_rq_based_ios(void)
|
||||
{
|
||||
return __dm_get_reserved_ios(&reserved_rq_based_ios,
|
||||
RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
|
||||
|
||||
static int __init local_init(void)
|
||||
{
|
||||
int r = -ENOMEM;
|
||||
|
@ -2277,6 +2322,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
|
|||
return md->immutable_target_type;
|
||||
}
|
||||
|
||||
/*
|
||||
* The queue_limits are only valid as long as you have a reference
|
||||
* count on 'md'.
|
||||
*/
|
||||
struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
|
||||
{
|
||||
BUG_ON(!atomic_read(&md->holders));
|
||||
return &md->queue->limits;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
|
||||
|
||||
/*
|
||||
* Fully initialize a request-based queue (->elevator, ->request_fn, etc).
|
||||
*/
|
||||
|
@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
|
|||
|
||||
if (type == DM_TYPE_BIO_BASED) {
|
||||
cachep = _io_cache;
|
||||
pool_size = 16;
|
||||
pool_size = dm_get_reserved_bio_based_ios();
|
||||
front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
|
||||
} else if (type == DM_TYPE_REQUEST_BASED) {
|
||||
cachep = _rq_tio_cache;
|
||||
pool_size = MIN_IOS;
|
||||
pool_size = dm_get_reserved_rq_based_ios();
|
||||
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
|
||||
/* per_bio_data_size is not used. See __bind_mempools(). */
|
||||
WARN_ON(per_bio_data_size != 0);
|
||||
} else
|
||||
goto out;
|
||||
|
||||
pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep);
|
||||
pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
|
||||
if (!pools->io_pool)
|
||||
goto out;
|
||||
|
||||
|
@ -2924,6 +2980,13 @@ module_exit(dm_exit);
|
|||
|
||||
module_param(major, uint, 0);
|
||||
MODULE_PARM_DESC(major, "The major number of the device mapper");
|
||||
|
||||
module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
|
||||
|
||||
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
|
||||
|
||||
MODULE_DESCRIPTION(DM_NAME " driver");
|
||||
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
|
@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
|
|||
/*
|
||||
* Helpers that are used by DM core
|
||||
*/
|
||||
unsigned dm_get_reserved_bio_based_ios(void);
|
||||
unsigned dm_get_reserved_rq_based_ios(void);
|
||||
|
||||
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
|
||||
{
|
||||
return !maxlen || strlen(result) + 1 >= maxlen;
|
||||
|
|
|
@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti);
|
|||
union map_info *dm_get_mapinfo(struct bio *bio);
|
||||
union map_info *dm_get_rq_mapinfo(struct request *rq);
|
||||
|
||||
struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
|
||||
|
||||
/*
|
||||
* Geometry functions.
|
||||
*/
|
||||
int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
|
||||
int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
|
||||
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Functions for manipulating device-mapper tables.
|
||||
*---------------------------------------------------------------*/
|
||||
|
|
Loading…
Reference in New Issue