dm: submit stacked requests in irq enabled context
Switch to having request-based DM enqueue all prep'ed requests into work processed by another thread. This allows request-based DM to invoke block APIs that assume interrupt enabled context (e.g. blk_get_request) and is a prerequisite for adding blk-mq support to request-based DM. The new kernel thread is only initialized for request-based DM devices. multipath_map() is now always in irq enabled context so change multipath spinlock (m->lock) locking to always disable interrupts. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
1ae49ea2cf
commit
2eb6e1e3aa
|
@ -384,12 +384,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
|
||||||
struct multipath *m = (struct multipath *) ti->private;
|
struct multipath *m = (struct multipath *) ti->private;
|
||||||
int r = DM_MAPIO_REQUEUE;
|
int r = DM_MAPIO_REQUEUE;
|
||||||
size_t nr_bytes = blk_rq_bytes(clone);
|
size_t nr_bytes = blk_rq_bytes(clone);
|
||||||
unsigned long flags;
|
|
||||||
struct pgpath *pgpath;
|
struct pgpath *pgpath;
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
struct dm_mpath_io *mpio;
|
struct dm_mpath_io *mpio;
|
||||||
|
|
||||||
spin_lock_irqsave(&m->lock, flags);
|
spin_lock_irq(&m->lock);
|
||||||
|
|
||||||
/* Do we need to select a new pgpath? */
|
/* Do we need to select a new pgpath? */
|
||||||
if (!m->current_pgpath ||
|
if (!m->current_pgpath ||
|
||||||
|
@ -411,21 +410,26 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
|
||||||
/* ENOMEM, requeue */
|
/* ENOMEM, requeue */
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
bdev = pgpath->path.dev->bdev;
|
|
||||||
clone->q = bdev_get_queue(bdev);
|
|
||||||
clone->rq_disk = bdev->bd_disk;
|
|
||||||
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
|
|
||||||
mpio = map_context->ptr;
|
mpio = map_context->ptr;
|
||||||
mpio->pgpath = pgpath;
|
mpio->pgpath = pgpath;
|
||||||
mpio->nr_bytes = nr_bytes;
|
mpio->nr_bytes = nr_bytes;
|
||||||
|
|
||||||
|
bdev = pgpath->path.dev->bdev;
|
||||||
|
|
||||||
|
clone->q = bdev_get_queue(bdev);
|
||||||
|
clone->rq_disk = bdev->bd_disk;
|
||||||
|
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
|
||||||
|
|
||||||
|
spin_unlock_irq(&m->lock);
|
||||||
|
|
||||||
if (pgpath->pg->ps.type->start_io)
|
if (pgpath->pg->ps.type->start_io)
|
||||||
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
|
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
|
||||||
&pgpath->path,
|
&pgpath->path,
|
||||||
nr_bytes);
|
nr_bytes);
|
||||||
r = DM_MAPIO_REMAPPED;
|
return DM_MAPIO_REMAPPED;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
spin_unlock_irqrestore(&m->lock, flags);
|
spin_unlock_irq(&m->lock);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <linux/hdreg.h>
|
#include <linux/hdreg.h>
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
|
#include <linux/kthread.h>
|
||||||
|
|
||||||
#include <trace/events/block.h>
|
#include <trace/events/block.h>
|
||||||
|
|
||||||
|
@ -79,6 +80,7 @@ struct dm_rq_target_io {
|
||||||
struct mapped_device *md;
|
struct mapped_device *md;
|
||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
struct request *orig, *clone;
|
struct request *orig, *clone;
|
||||||
|
struct kthread_work work;
|
||||||
int error;
|
int error;
|
||||||
union map_info info;
|
union map_info info;
|
||||||
};
|
};
|
||||||
|
@ -208,6 +210,9 @@ struct mapped_device {
|
||||||
struct bio flush_bio;
|
struct bio flush_bio;
|
||||||
|
|
||||||
struct dm_stats stats;
|
struct dm_stats stats;
|
||||||
|
|
||||||
|
struct kthread_worker kworker;
|
||||||
|
struct task_struct *kworker_task;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1773,6 +1778,8 @@ static struct request *__clone_rq(struct request *rq, struct mapped_device *md,
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void map_tio_request(struct kthread_work *work);
|
||||||
|
|
||||||
static struct request *clone_rq(struct request *rq, struct mapped_device *md,
|
static struct request *clone_rq(struct request *rq, struct mapped_device *md,
|
||||||
gfp_t gfp_mask)
|
gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
|
@ -1789,6 +1796,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
|
||||||
tio->orig = rq;
|
tio->orig = rq;
|
||||||
tio->error = 0;
|
tio->error = 0;
|
||||||
memset(&tio->info, 0, sizeof(tio->info));
|
memset(&tio->info, 0, sizeof(tio->info));
|
||||||
|
init_kthread_work(&tio->work, map_tio_request);
|
||||||
|
|
||||||
clone = __clone_rq(rq, md, tio, GFP_ATOMIC);
|
clone = __clone_rq(rq, md, tio, GFP_ATOMIC);
|
||||||
if (!clone) {
|
if (!clone) {
|
||||||
|
@ -1833,7 +1841,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
|
||||||
int r, requeued = 0;
|
int r, requeued = 0;
|
||||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||||
|
|
||||||
tio->ti = ti;
|
|
||||||
r = ti->type->map_rq(ti, clone, &tio->info);
|
r = ti->type->map_rq(ti, clone, &tio->info);
|
||||||
switch (r) {
|
switch (r) {
|
||||||
case DM_MAPIO_SUBMITTED:
|
case DM_MAPIO_SUBMITTED:
|
||||||
|
@ -1864,6 +1871,13 @@ static int map_request(struct dm_target *ti, struct request *clone,
|
||||||
return requeued;
|
return requeued;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void map_tio_request(struct kthread_work *work)
|
||||||
|
{
|
||||||
|
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
|
||||||
|
|
||||||
|
map_request(tio->ti, tio->clone, tio->md);
|
||||||
|
}
|
||||||
|
|
||||||
static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
|
static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
|
||||||
{
|
{
|
||||||
struct request *clone;
|
struct request *clone;
|
||||||
|
@ -1895,6 +1909,7 @@ static void dm_request_fn(struct request_queue *q)
|
||||||
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
struct request *rq, *clone;
|
struct request *rq, *clone;
|
||||||
|
struct dm_rq_target_io *tio;
|
||||||
sector_t pos;
|
sector_t pos;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1930,20 +1945,15 @@ static void dm_request_fn(struct request_queue *q)
|
||||||
|
|
||||||
clone = dm_start_request(md, rq);
|
clone = dm_start_request(md, rq);
|
||||||
|
|
||||||
spin_unlock(q->queue_lock);
|
tio = rq->special;
|
||||||
if (map_request(ti, clone, md))
|
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||||
goto requeued;
|
tio->ti = ti;
|
||||||
|
queue_kthread_work(&md->kworker, &tio->work);
|
||||||
BUG_ON(!irqs_disabled());
|
BUG_ON(!irqs_disabled());
|
||||||
spin_lock(q->queue_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
requeued:
|
|
||||||
BUG_ON(!irqs_disabled());
|
|
||||||
spin_lock(q->queue_lock);
|
|
||||||
|
|
||||||
delay_and_out:
|
delay_and_out:
|
||||||
blk_delay_queue(q, HZ / 10);
|
blk_delay_queue(q, HZ / 10);
|
||||||
out:
|
out:
|
||||||
|
@ -2129,6 +2139,7 @@ static struct mapped_device *alloc_dev(int minor)
|
||||||
INIT_WORK(&md->work, dm_wq_work);
|
INIT_WORK(&md->work, dm_wq_work);
|
||||||
init_waitqueue_head(&md->eventq);
|
init_waitqueue_head(&md->eventq);
|
||||||
init_completion(&md->kobj_holder.completion);
|
init_completion(&md->kobj_holder.completion);
|
||||||
|
md->kworker_task = NULL;
|
||||||
|
|
||||||
md->disk->major = _major;
|
md->disk->major = _major;
|
||||||
md->disk->first_minor = minor;
|
md->disk->first_minor = minor;
|
||||||
|
@ -2189,6 +2200,9 @@ static void free_dev(struct mapped_device *md)
|
||||||
unlock_fs(md);
|
unlock_fs(md);
|
||||||
bdput(md->bdev);
|
bdput(md->bdev);
|
||||||
destroy_workqueue(md->wq);
|
destroy_workqueue(md->wq);
|
||||||
|
|
||||||
|
if (md->kworker_task)
|
||||||
|
kthread_stop(md->kworker_task);
|
||||||
if (md->io_pool)
|
if (md->io_pool)
|
||||||
mempool_destroy(md->io_pool);
|
mempool_destroy(md->io_pool);
|
||||||
if (md->rq_pool)
|
if (md->rq_pool)
|
||||||
|
@ -2484,6 +2498,11 @@ static int dm_init_request_based_queue(struct mapped_device *md)
|
||||||
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
||||||
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
||||||
|
|
||||||
|
/* Also initialize the request-based DM worker thread */
|
||||||
|
init_kthread_worker(&md->kworker);
|
||||||
|
md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
|
||||||
|
"kdmwork-%s", dm_device_name(md));
|
||||||
|
|
||||||
elv_register_queue(md->queue);
|
elv_register_queue(md->queue);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -2574,6 +2593,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
|
||||||
set_bit(DMF_FREEING, &md->flags);
|
set_bit(DMF_FREEING, &md->flags);
|
||||||
spin_unlock(&_minor_lock);
|
spin_unlock(&_minor_lock);
|
||||||
|
|
||||||
|
if (dm_request_based(md))
|
||||||
|
flush_kthread_worker(&md->kworker);
|
||||||
|
|
||||||
if (!dm_suspended_md(md)) {
|
if (!dm_suspended_md(md)) {
|
||||||
dm_table_presuspend_targets(map);
|
dm_table_presuspend_targets(map);
|
||||||
dm_table_postsuspend_targets(map);
|
dm_table_postsuspend_targets(map);
|
||||||
|
@ -2817,8 +2839,10 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
||||||
* Stop md->queue before flushing md->wq in case request-based
|
* Stop md->queue before flushing md->wq in case request-based
|
||||||
* dm defers requests to md->wq from md->queue.
|
* dm defers requests to md->wq from md->queue.
|
||||||
*/
|
*/
|
||||||
if (dm_request_based(md))
|
if (dm_request_based(md)) {
|
||||||
stop_queue(md->queue);
|
stop_queue(md->queue);
|
||||||
|
flush_kthread_worker(&md->kworker);
|
||||||
|
}
|
||||||
|
|
||||||
flush_workqueue(md->wq);
|
flush_workqueue(md->wq);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue