io_uring: save atomic dec for inline executed reqs
When a request is completed with comp_state, its completion reference put is deferred to io_submit_flush_completions(), but the submission is put not far from there, so do it together to save one atomic dec per request. That targets requests that complete inline, e.g. buffered rw, send/recv. Proper benchmarking haven't been conducted but for nops(batch=32) it was around 7901 vs 8117 KIOPS (~2.7%), or ~4% per perf profiling. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
9affd664f0
commit
e342c807f5
|
@ -629,6 +629,7 @@ enum {
|
||||||
REQ_F_NO_FILE_TABLE_BIT,
|
REQ_F_NO_FILE_TABLE_BIT,
|
||||||
REQ_F_WORK_INITIALIZED_BIT,
|
REQ_F_WORK_INITIALIZED_BIT,
|
||||||
REQ_F_LTIMEOUT_ACTIVE_BIT,
|
REQ_F_LTIMEOUT_ACTIVE_BIT,
|
||||||
|
REQ_F_COMPLETE_INLINE_BIT,
|
||||||
|
|
||||||
/* not a real bit, just to check we're not overflowing the space */
|
/* not a real bit, just to check we're not overflowing the space */
|
||||||
__REQ_F_LAST_BIT,
|
__REQ_F_LAST_BIT,
|
||||||
|
@ -672,6 +673,8 @@ enum {
|
||||||
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
|
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
|
||||||
/* linked timeout is active, i.e. prepared by link's head */
|
/* linked timeout is active, i.e. prepared by link's head */
|
||||||
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
|
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
|
||||||
|
/* completion is deferred through io_comp_state */
|
||||||
|
REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
|
||||||
};
|
};
|
||||||
|
|
||||||
struct async_poll {
|
struct async_poll {
|
||||||
|
@ -1917,14 +1920,15 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
|
||||||
* io_free_req() doesn't care about completion_lock unless one
|
* io_free_req() doesn't care about completion_lock unless one
|
||||||
* of these flags is set. REQ_F_WORK_INITIALIZED is in the list
|
* of these flags is set. REQ_F_WORK_INITIALIZED is in the list
|
||||||
* because of a potential deadlock with req->work.fs->lock
|
* because of a potential deadlock with req->work.fs->lock
|
||||||
|
* We defer both, completion and submission refs.
|
||||||
*/
|
*/
|
||||||
if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
|
if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
|
||||||
|REQ_F_WORK_INITIALIZED)) {
|
|REQ_F_WORK_INITIALIZED)) {
|
||||||
spin_unlock_irq(&ctx->completion_lock);
|
spin_unlock_irq(&ctx->completion_lock);
|
||||||
io_put_req(req);
|
io_double_put_req(req);
|
||||||
spin_lock_irq(&ctx->completion_lock);
|
spin_lock_irq(&ctx->completion_lock);
|
||||||
} else {
|
} else {
|
||||||
io_put_req(req);
|
io_double_put_req(req);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
io_commit_cqring(ctx);
|
io_commit_cqring(ctx);
|
||||||
|
@ -1940,8 +1944,7 @@ static void io_req_complete_state(struct io_kiocb *req, long res,
|
||||||
io_clean_op(req);
|
io_clean_op(req);
|
||||||
req->result = res;
|
req->result = res;
|
||||||
req->compl.cflags = cflags;
|
req->compl.cflags = cflags;
|
||||||
list_add_tail(&req->compl.list, &cs->list);
|
req->flags |= REQ_F_COMPLETE_INLINE;
|
||||||
cs->nr++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __io_req_complete(struct io_kiocb *req, long res,
|
static inline void __io_req_complete(struct io_kiocb *req, long res,
|
||||||
|
@ -6576,9 +6579,9 @@ again:
|
||||||
io_queue_linked_timeout(linked_timeout);
|
io_queue_linked_timeout(linked_timeout);
|
||||||
} else if (likely(!ret)) {
|
} else if (likely(!ret)) {
|
||||||
/* drop submission reference */
|
/* drop submission reference */
|
||||||
if (cs) {
|
if (req->flags & REQ_F_COMPLETE_INLINE) {
|
||||||
io_put_req(req);
|
list_add_tail(&req->compl.list, &cs->list);
|
||||||
if (cs->nr >= 32)
|
if (++cs->nr >= 32)
|
||||||
io_submit_flush_completions(cs);
|
io_submit_flush_completions(cs);
|
||||||
req = NULL;
|
req = NULL;
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue