blk-mq: init hctx sched after update ctx and hctx mapping
Currently, when update nr_hw_queues, IO scheduler's init_hctx will be invoked before the mapping between ctx and hctx is adapted correctly by blk_mq_map_swqueue. The IO scheduler init_hctx (kyber) may depend on this mapping and get wrong result and panic finally. A simply way to fix this is that switch the IO scheduler to 'none' before update the nr_hw_queues, and then switch it back after update nr_hw_queues. blk_mq_sched_init_/exit_hctx are removed due to nobody use them any more. Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
fcedba42d9
commit
d48ece209f
|
@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
|
|||
blk_mq_sched_free_tags(set, hctx, i);
|
||||
}
|
||||
|
||||
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
int ret;
|
||||
|
||||
if (!e)
|
||||
return 0;
|
||||
|
||||
ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (e->type->ops.mq.init_hctx) {
|
||||
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
|
||||
if (ret) {
|
||||
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
blk_mq_debugfs_register_sched_hctx(q, hctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (!e)
|
||||
return;
|
||||
|
||||
blk_mq_debugfs_unregister_sched_hctx(hctx);
|
||||
|
||||
if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
|
||||
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
|
||||
hctx->sched_data = NULL;
|
||||
}
|
||||
|
||||
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
||||
}
|
||||
|
||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
|
|
@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
|||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
||||
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
|
||||
|
||||
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx);
|
||||
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx);
|
||||
|
||||
static inline bool
|
||||
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
|
|
|
@ -2147,8 +2147,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
|
|||
if (set->ops->exit_request)
|
||||
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
|
||||
|
||||
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
|
||||
|
||||
if (set->ops->exit_hctx)
|
||||
set->ops->exit_hctx(hctx, hctx_idx);
|
||||
|
||||
|
@ -2216,12 +2214,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
|
||||
goto free_bitmap;
|
||||
|
||||
if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
|
||||
goto exit_hctx;
|
||||
|
||||
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
|
||||
if (!hctx->fq)
|
||||
goto sched_exit_hctx;
|
||||
goto exit_hctx;
|
||||
|
||||
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
|
||||
goto free_fq;
|
||||
|
@ -2235,8 +2230,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||
|
||||
free_fq:
|
||||
kfree(hctx->fq);
|
||||
sched_exit_hctx:
|
||||
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
|
||||
exit_hctx:
|
||||
if (set->ops->exit_hctx)
|
||||
set->ops->exit_hctx(hctx, hctx_idx);
|
||||
|
@ -2898,10 +2891,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* request_queue and elevator_type pair.
|
||||
* It is just used by __blk_mq_update_nr_hw_queues to cache
|
||||
* the elevator_type associated with a request_queue.
|
||||
*/
|
||||
struct blk_mq_qe_pair {
|
||||
struct list_head node;
|
||||
struct request_queue *q;
|
||||
struct elevator_type *type;
|
||||
};
|
||||
|
||||
/*
|
||||
* Cache the elevator_type in qe pair list and switch the
|
||||
* io scheduler to 'none'
|
||||
*/
|
||||
static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_qe_pair *qe;
|
||||
|
||||
if (!q->elevator)
|
||||
return true;
|
||||
|
||||
qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
|
||||
if (!qe)
|
||||
return false;
|
||||
|
||||
INIT_LIST_HEAD(&qe->node);
|
||||
qe->q = q;
|
||||
qe->type = q->elevator->type;
|
||||
list_add(&qe->node, head);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
/*
|
||||
* After elevator_switch_mq, the previous elevator_queue will be
|
||||
* released by elevator_release. The reference of the io scheduler
|
||||
* module get by elevator_get will also be put. So we need to get
|
||||
* a reference of the io scheduler module here to prevent it to be
|
||||
* removed.
|
||||
*/
|
||||
__module_get(qe->type->elevator_owner);
|
||||
elevator_switch_mq(q, NULL);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void blk_mq_elv_switch_back(struct list_head *head,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_qe_pair *qe;
|
||||
struct elevator_type *t = NULL;
|
||||
|
||||
list_for_each_entry(qe, head, node)
|
||||
if (qe->q == q) {
|
||||
t = qe->type;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!t)
|
||||
return;
|
||||
|
||||
list_del(&qe->node);
|
||||
kfree(qe);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
elevator_switch_mq(q, t);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
|
||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
int nr_hw_queues)
|
||||
{
|
||||
struct request_queue *q;
|
||||
LIST_HEAD(head);
|
||||
|
||||
lockdep_assert_held(&set->tag_list_lock);
|
||||
|
||||
|
@ -2912,6 +2976,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
blk_mq_freeze_queue(q);
|
||||
/*
|
||||
* Switch IO scheduler to 'none', cleaning up the data associated
|
||||
* with the previous scheduler. We will switch back once we are done
|
||||
* updating the new sw to hw queue mappings.
|
||||
*/
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
if (!blk_mq_elv_switch_none(&head, q))
|
||||
goto switch_back;
|
||||
|
||||
set->nr_hw_queues = nr_hw_queues;
|
||||
blk_mq_update_queue_map(set);
|
||||
|
@ -2920,6 +2992,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||
blk_mq_queue_reinit(q);
|
||||
}
|
||||
|
||||
switch_back:
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
blk_mq_elv_switch_back(&head, q);
|
||||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
blk_mq_unfreeze_queue(q);
|
||||
}
|
||||
|
|
|
@ -234,6 +234,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
|
|||
|
||||
int elevator_init(struct request_queue *);
|
||||
int elevator_init_mq(struct request_queue *q);
|
||||
int elevator_switch_mq(struct request_queue *q,
|
||||
struct elevator_type *new_e);
|
||||
void elevator_exit(struct request_queue *, struct elevator_queue *);
|
||||
int elv_register_queue(struct request_queue *q);
|
||||
void elv_unregister_queue(struct request_queue *q);
|
||||
|
|
|
@ -933,16 +933,13 @@ void elv_unregister(struct elevator_type *e)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(elv_unregister);
|
||||
|
||||
static int elevator_switch_mq(struct request_queue *q,
|
||||
int elevator_switch_mq(struct request_queue *q,
|
||||
struct elevator_type *new_e)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
||||
if (q->elevator) {
|
||||
if (q->elevator->registered)
|
||||
elv_unregister_queue(q);
|
||||
|
@ -968,8 +965,6 @@ static int elevator_switch_mq(struct request_queue *q,
|
|||
blk_add_trace_msg(q, "elv switch: none");
|
||||
|
||||
out:
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1021,8 +1016,17 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
|||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
if (q->mq_ops)
|
||||
return elevator_switch_mq(q, new_e);
|
||||
if (q->mq_ops) {
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
||||
err = elevator_switch_mq(q, new_e);
|
||||
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn on BYPASS and drain all requests w/ elevator private data.
|
||||
|
|
Loading…
Reference in New Issue