io_uring: batch account ->req_issue and task struct references

Identical to how we handle the ctx reference counts, increase by the batch we're expecting to submit, and handle any slow path residual, if any. The request alloc-and-issue path is very hot, and this makes a noticeable difference by avoiding an two atomic incs for each individual request. Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-10-07 12:48:53 -06:00 · 2020-10-07 12:48:53 -06:00 · faf7b51c06
parent 87c4311fd2
commit faf7b51c06
1 changed files with 5 additions and 2 deletions
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@ -6429,8 +6429,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	/* one is dropped after submission, the other at completion */
 	refcount_set(&req->refs, 2);
 	req->task = current;
-	get_task_struct(req->task);
-	atomic_long_inc(&req->task->io_uring->req_issue);
 	req->result = 0;

 	if (unlikely(req->opcode >= IORING_OP_LAST))
@ -6488,6 +6486,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;

+	atomic_long_add(nr, &current->io_uring->req_issue);
+	refcount_add(nr, &current->usage);
+
 	io_submit_state_start(&state, ctx, nr);

 	for (i = 0; i < nr; i++) {
@ -6530,6 +6531,8 @@ fail_req:
 		int ref_used = (submitted == -EAGAIN) ? 0 : submitted;

 		percpu_ref_put_many(&ctx->refs, nr - ref_used);
+		atomic_long_sub(nr - ref_used, &current->io_uring->req_issue);
+		put_task_struct_many(current, nr - ref_used);
 	}
 	if (link)
 		io_queue_link_head(link, &state.comp);