io_uring: batch account ->req_issue and task struct references

Identical to how we handle the ctx reference counts, increase by the
batch we're expecting to submit, and handle any slow path residual,
if any. The request alloc-and-issue path is very hot, and this makes
a noticeable difference by avoiding an two atomic incs for each
individual request.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2020-10-07 12:48:53 -06:00
parent 87c4311fd2
commit faf7b51c06
1 changed files with 5 additions and 2 deletions

View File

@ -6429,8 +6429,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
req->task = current;
get_task_struct(req->task);
atomic_long_inc(&req->task->io_uring->req_issue);
req->result = 0;
if (unlikely(req->opcode >= IORING_OP_LAST))
@ -6488,6 +6486,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
if (!percpu_ref_tryget_many(&ctx->refs, nr))
return -EAGAIN;
atomic_long_add(nr, &current->io_uring->req_issue);
refcount_add(nr, &current->usage);
io_submit_state_start(&state, ctx, nr);
for (i = 0; i < nr; i++) {
@ -6530,6 +6531,8 @@ fail_req:
int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
percpu_ref_put_many(&ctx->refs, nr - ref_used);
atomic_long_sub(nr - ref_used, &current->io_uring->req_issue);
put_task_struct_many(current, nr - ref_used);
}
if (link)
io_queue_link_head(link, &state.comp);