block: Fix a race between request queue removal and the block cgroup controller
Avoid that the following race can occur: blk_cleanup_queue() blkcg_print_blkgs() spin_lock_irq(lock) (1) spin_lock_irq(blkg->q->queue_lock) (2,5) q->queue_lock = &q->__queue_lock (3) spin_unlock_irq(lock) (4) spin_unlock_irq(blkg->q->queue_lock) (6) (1) take driver lock; (2) busy loop for driver lock; (3) override driver lock with internal lock; (4) unlock driver lock; (5) can take driver lock now; (6) but unlock internal lock. This change is safe because only the SCSI core and the NVME core keep a reference on a request queue after having called blk_cleanup_queue(). Neither driver accesses any of the removed data structures between its blk_cleanup_queue() and blk_put_queue() calls. Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com> Cc: Jan Kara <jack@suse.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
498f6650ae
commit
a063057d7c
|
@ -719,6 +719,37 @@ void blk_cleanup_queue(struct request_queue *q)
|
|||
del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
|
||||
blk_sync_queue(q);
|
||||
|
||||
/*
|
||||
* I/O scheduler exit is only safe after the sysfs scheduler attribute
|
||||
* has been removed.
|
||||
*/
|
||||
WARN_ON_ONCE(q->kobj.state_in_sysfs);
|
||||
|
||||
/*
|
||||
* Since the I/O scheduler exit code may access cgroup information,
|
||||
* perform I/O scheduler exit before disassociating from the block
|
||||
* cgroup controller.
|
||||
*/
|
||||
if (q->elevator) {
|
||||
ioc_clear_queue(q);
|
||||
elevator_exit(q, q->elevator);
|
||||
q->elevator = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all references to @q from the block cgroup controller before
|
||||
* restoring @q->queue_lock to avoid that restoring this pointer causes
|
||||
* e.g. blkcg_print_blkgs() to crash.
|
||||
*/
|
||||
blkcg_exit_queue(q);
|
||||
|
||||
/*
|
||||
* Since the cgroup code may dereference the @q->backing_dev_info
|
||||
* pointer, only decrease its reference count after having removed the
|
||||
* association with the block cgroup controller.
|
||||
*/
|
||||
bdi_put(q->backing_dev_info);
|
||||
|
||||
if (q->mq_ops)
|
||||
blk_mq_free_queue(q);
|
||||
percpu_ref_exit(&q->q_usage_counter);
|
||||
|
|
|
@ -798,13 +798,6 @@ static void __blk_release_queue(struct work_struct *work)
|
|||
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||
blk_stat_remove_callback(q, q->poll_cb);
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
bdi_put(q->backing_dev_info);
|
||||
blkcg_exit_queue(q);
|
||||
|
||||
if (q->elevator) {
|
||||
ioc_clear_queue(q);
|
||||
elevator_exit(q, q->elevator);
|
||||
}
|
||||
|
||||
blk_free_queue_stats(q->stats);
|
||||
|
||||
|
|
Loading…
Reference in New Issue