io_uring: fix REQ_F_COMP_LOCKED by killing it
REQ_F_COMP_LOCKED is used and implemented in a buggy way. The problem is that the flag is set before io_put_req() but not cleared after, and if that wasn't the final reference, the request will be freed with the flag set from some other context, which may not hold a spinlock. That means possible races with removing linked timeouts and unsynchronised completion (e.g. access to CQ). Instead of fixing REQ_F_COMP_LOCKED, kill the flag and use task_work_add() to move such requests to a fresh context to free from it, as was done with __io_free_req_finish(). Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
4edf20f999
commit
216578e55a
149
fs/io_uring.c
149
fs/io_uring.c
|
@ -574,7 +574,6 @@ enum {
|
|||
REQ_F_NOWAIT_BIT,
|
||||
REQ_F_LINK_TIMEOUT_BIT,
|
||||
REQ_F_ISREG_BIT,
|
||||
REQ_F_COMP_LOCKED_BIT,
|
||||
REQ_F_NEED_CLEANUP_BIT,
|
||||
REQ_F_POLLED_BIT,
|
||||
REQ_F_BUFFER_SELECTED_BIT,
|
||||
|
@ -613,8 +612,6 @@ enum {
|
|||
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
|
||||
/* regular file */
|
||||
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
|
||||
/* completion under lock */
|
||||
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
|
||||
/* needs cleanup */
|
||||
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
|
||||
/* already went through poll handler */
|
||||
|
@ -963,8 +960,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
|
|||
struct io_comp_state *cs);
|
||||
static void io_cqring_fill_event(struct io_kiocb *req, long res);
|
||||
static void io_put_req(struct io_kiocb *req);
|
||||
static void io_put_req_deferred(struct io_kiocb *req, int nr);
|
||||
static void io_double_put_req(struct io_kiocb *req);
|
||||
static void __io_double_put_req(struct io_kiocb *req);
|
||||
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
|
||||
static void __io_queue_linked_timeout(struct io_kiocb *req);
|
||||
static void io_queue_linked_timeout(struct io_kiocb *req);
|
||||
|
@ -1316,9 +1313,8 @@ static void io_kill_timeout(struct io_kiocb *req)
|
|||
atomic_set(&req->ctx->cq_timeouts,
|
||||
atomic_read(&req->ctx->cq_timeouts) + 1);
|
||||
list_del_init(&req->timeout.list);
|
||||
req->flags |= REQ_F_COMP_LOCKED;
|
||||
io_cqring_fill_event(req, 0);
|
||||
io_put_req(req);
|
||||
io_put_req_deferred(req, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1369,8 +1365,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
|
|||
if (link) {
|
||||
__io_queue_linked_timeout(link);
|
||||
/* drop submission reference */
|
||||
link->flags |= REQ_F_COMP_LOCKED;
|
||||
io_put_req(link);
|
||||
io_put_req_deferred(link, 1);
|
||||
}
|
||||
kfree(de);
|
||||
} while (!list_empty(&ctx->defer_list));
|
||||
|
@ -1597,13 +1592,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
|
|||
req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
|
||||
list_del(&req->compl.list);
|
||||
__io_cqring_fill_event(req, req->result, req->compl.cflags);
|
||||
if (!(req->flags & REQ_F_LINK_HEAD)) {
|
||||
req->flags |= REQ_F_COMP_LOCKED;
|
||||
io_put_req(req);
|
||||
} else {
|
||||
|
||||
/*
|
||||
* io_free_req() doesn't care about completion_lock unless one
|
||||
* of these flags is set. REQ_F_WORK_INITIALIZED is in the list
|
||||
* because of a potential deadlock with req->work.fs->lock
|
||||
*/
|
||||
if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
|
||||
|REQ_F_WORK_INITIALIZED)) {
|
||||
spin_unlock_irq(&ctx->completion_lock);
|
||||
io_put_req(req);
|
||||
spin_lock_irq(&ctx->completion_lock);
|
||||
} else {
|
||||
io_put_req(req);
|
||||
}
|
||||
}
|
||||
io_commit_cqring(ctx);
|
||||
|
@ -1702,10 +1703,14 @@ static void io_dismantle_req(struct io_kiocb *req)
|
|||
io_req_clean_work(req);
|
||||
}
|
||||
|
||||
static void __io_free_req_finish(struct io_kiocb *req)
|
||||
static void __io_free_req(struct io_kiocb *req)
|
||||
{
|
||||
struct io_uring_task *tctx = req->task->io_uring;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_uring_task *tctx;
|
||||
struct io_ring_ctx *ctx;
|
||||
|
||||
io_dismantle_req(req);
|
||||
tctx = req->task->io_uring;
|
||||
ctx = req->ctx;
|
||||
|
||||
atomic_long_inc(&tctx->req_complete);
|
||||
if (tctx->in_idle)
|
||||
|
@ -1719,33 +1724,6 @@ static void __io_free_req_finish(struct io_kiocb *req)
|
|||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
|
||||
static void io_req_task_file_table_put(struct callback_head *cb)
|
||||
{
|
||||
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
|
||||
|
||||
io_dismantle_req(req);
|
||||
__io_free_req_finish(req);
|
||||
}
|
||||
|
||||
static void __io_free_req(struct io_kiocb *req)
|
||||
{
|
||||
if (!(req->flags & REQ_F_COMP_LOCKED)) {
|
||||
io_dismantle_req(req);
|
||||
__io_free_req_finish(req);
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
init_task_work(&req->task_work, io_req_task_file_table_put);
|
||||
ret = task_work_add(req->task, &req->task_work, TWA_RESUME);
|
||||
if (unlikely(ret)) {
|
||||
struct task_struct *tsk;
|
||||
|
||||
tsk = io_wq_get_task(req->ctx->io_wq);
|
||||
task_work_add(tsk, &req->task_work, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool io_link_cancel_timeout(struct io_kiocb *req)
|
||||
{
|
||||
struct io_timeout_data *io = req->async_data;
|
||||
|
@ -1754,11 +1732,10 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)
|
|||
|
||||
ret = hrtimer_try_to_cancel(&io->timer);
|
||||
if (ret != -1) {
|
||||
req->flags |= REQ_F_COMP_LOCKED;
|
||||
io_cqring_fill_event(req, -ECANCELED);
|
||||
io_commit_cqring(ctx);
|
||||
req->flags &= ~REQ_F_LINK_HEAD;
|
||||
io_put_req(req);
|
||||
io_put_req_deferred(req, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1785,17 +1762,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
|
|||
static void io_kill_linked_timeout(struct io_kiocb *req)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned long flags;
|
||||
bool wake_ev;
|
||||
|
||||
if (!(req->flags & REQ_F_COMP_LOCKED)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
wake_ev = __io_kill_linked_timeout(req);
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
} else {
|
||||
wake_ev = __io_kill_linked_timeout(req);
|
||||
}
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
wake_ev = __io_kill_linked_timeout(req);
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
|
||||
if (wake_ev)
|
||||
io_cqring_ev_posted(ctx);
|
||||
|
@ -1835,27 +1807,29 @@ static void __io_fail_links(struct io_kiocb *req)
|
|||
trace_io_uring_fail_link(req, link);
|
||||
|
||||
io_cqring_fill_event(link, -ECANCELED);
|
||||
link->flags |= REQ_F_COMP_LOCKED;
|
||||
__io_double_put_req(link);
|
||||
|
||||
/*
|
||||
* It's ok to free under spinlock as they're not linked anymore,
|
||||
* but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
|
||||
* work.fs->lock.
|
||||
*/
|
||||
if (link->flags & REQ_F_WORK_INITIALIZED)
|
||||
io_put_req_deferred(link, 2);
|
||||
else
|
||||
io_double_put_req(link);
|
||||
}
|
||||
|
||||
io_commit_cqring(ctx);
|
||||
io_cqring_ev_posted(ctx);
|
||||
}
|
||||
|
||||
static void io_fail_links(struct io_kiocb *req)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned long flags;
|
||||
|
||||
if (!(req->flags & REQ_F_COMP_LOCKED)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
__io_fail_links(req);
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
} else {
|
||||
__io_fail_links(req);
|
||||
}
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
__io_fail_links(req);
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
|
||||
io_cqring_ev_posted(ctx);
|
||||
}
|
||||
|
@ -2069,6 +2043,34 @@ static void io_put_req(struct io_kiocb *req)
|
|||
io_free_req(req);
|
||||
}
|
||||
|
||||
static void io_put_req_deferred_cb(struct callback_head *cb)
|
||||
{
|
||||
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
|
||||
|
||||
io_free_req(req);
|
||||
}
|
||||
|
||||
static void io_free_req_deferred(struct io_kiocb *req)
|
||||
{
|
||||
int ret;
|
||||
|
||||
init_task_work(&req->task_work, io_put_req_deferred_cb);
|
||||
ret = io_req_task_work_add(req, true);
|
||||
if (unlikely(ret)) {
|
||||
struct task_struct *tsk;
|
||||
|
||||
tsk = io_wq_get_task(req->ctx->io_wq);
|
||||
task_work_add(tsk, &req->task_work, 0);
|
||||
wake_up_process(tsk);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
|
||||
{
|
||||
if (refcount_sub_and_test(refs, &req->refs))
|
||||
io_free_req_deferred(req);
|
||||
}
|
||||
|
||||
static struct io_wq_work *io_steal_work(struct io_kiocb *req)
|
||||
{
|
||||
struct io_kiocb *nxt;
|
||||
|
@ -2085,17 +2087,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
|
|||
return nxt ? &nxt->work : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must only be used if we don't need to care about links, usually from
|
||||
* within the completion handling itself.
|
||||
*/
|
||||
static void __io_double_put_req(struct io_kiocb *req)
|
||||
{
|
||||
/* drop both submit and complete references */
|
||||
if (refcount_sub_and_test(2, &req->refs))
|
||||
__io_free_req(req);
|
||||
}
|
||||
|
||||
static void io_double_put_req(struct io_kiocb *req)
|
||||
{
|
||||
/* drop both submit and complete references */
|
||||
|
@ -5127,9 +5118,8 @@ static bool io_poll_remove_one(struct io_kiocb *req)
|
|||
if (do_complete) {
|
||||
io_cqring_fill_event(req, -ECANCELED);
|
||||
io_commit_cqring(req->ctx);
|
||||
req->flags |= REQ_F_COMP_LOCKED;
|
||||
req_set_fail_links(req);
|
||||
io_put_req(req);
|
||||
io_put_req_deferred(req, 1);
|
||||
}
|
||||
|
||||
return do_complete;
|
||||
|
@ -5311,9 +5301,8 @@ static int __io_timeout_cancel(struct io_kiocb *req)
|
|||
list_del_init(&req->timeout.list);
|
||||
|
||||
req_set_fail_links(req);
|
||||
req->flags |= REQ_F_COMP_LOCKED;
|
||||
io_cqring_fill_event(req, -ECANCELED);
|
||||
io_put_req(req);
|
||||
io_put_req_deferred(req, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue