io_uring-5.15-2021-09-17
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmFEigwQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgppRPD/98YkvMEutb/aU8qp3gkJDhoshUEhT/Ljq/ 5g574C4E8+k2ahxRuHnRu1w4WgYOuirAF2KNTitwFHW8HzFNbPpaufxNopLqUJOO 6+Ep4gzpEQQ3fcxyyFuaad0CIXqDtmA+/0Bg7euXcYcdXSSN2AYTDgXY9YuIzist ZMs7vdcF9vzGUVo+Eq/2jNANKqN+i1ECeKTcdNcgR5aDxz7p3QE1PEOl1DGvTvzH g7Gi1Ah7PIYRueuXFsG/4DoHIAZ6IIiirZlKjw4979Q6GvTawQjfm3XGhu7Xjzi4 4vy68fTKm0YKg2WfBCuQZsQODgvdlgwBZiYNA6HOK7ngtH47MxCh5gbd48Zcl7MX xlrnw8N50PG2ULLpfz6FQRMj4rMeo6onY9F3IHggMHQVmRUKRCr9Y4EFWMjQD8C/ 9bXhhMec7opsQ97gGSfGN7aWjFus4dXbyuMBlZB4sH7BFofJbchKIVSefzh6cbHP I9R1+cUtRgmDpg34CnlHcAnKHQOWd758Ez6lJtuR3Mukn0Km+qKE5eVNGP3ZKWW1 gnqhJ03K/MHLdqzg5aNML4k4bcEMKZgvXahtED9CGBadBQE68+sP6L1J5A0NObad Pn3MxpT32YKKtPjjGVEzaTpf9BZ9yQxmzUD2tOt8Ik+6Vn4AX3FjcQcQc80xK7wr bwUFwmqfvw== =4Ipo -----END PGP SIGNATURE----- Merge tag 'io_uring-5.15-2021-09-17' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: "Mostly fixes for regressions in this cycle, but also a few fixes that predate this release. The odd one out is a tweak to the direct files added in this release, where attempting to reuse a slot is allowed instead of needing an explicit removal of that slot first. It's a considerable improvement in usability to that API, hence I'm sending it for -rc2. - io-wq race fix and cleanup (Hao) - loop_rw_iter() type fix - SQPOLL max worker race fix - Allow poll arm for O_NONBLOCK files, fixing a case where it's impossible to properly use io_uring if you cannot modify the file flags - Allow direct open to simply reuse a slot, instead of needing it explicitly removed first (Pavel) - Fix a case where we missed signal mask restoring in cqring_wait, if we hit -EFAULT (Xiaoguang)" * tag 'io_uring-5.15-2021-09-17' of git://git.kernel.dk/linux-block: io_uring: allow retry for O_NONBLOCK if async is supported io_uring: auto-removal for direct open/accept io_uring: fix missing sigmask restore in io_cqring_wait() io_uring: pin SQPOLL data before unlocking ring lock io-wq: provide IO_WQ_* constants for IORING_REGISTER_IOWQ_MAX_WORKERS arg items io-wq: fix potential race of acct->nr_workers io-wq: code clean of io_wqe_create_worker() io_uring: ensure symmetry in handling iter types in loop_rw_iter()
This commit is contained in:
commit
0bc7eb03cb
27
fs/io-wq.c
27
fs/io-wq.c
|
@ -14,6 +14,7 @@
|
||||||
#include <linux/rculist_nulls.h>
|
#include <linux/rculist_nulls.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/tracehook.h>
|
#include <linux/tracehook.h>
|
||||||
|
#include <uapi/linux/io_uring.h>
|
||||||
|
|
||||||
#include "io-wq.h"
|
#include "io-wq.h"
|
||||||
|
|
||||||
|
@ -176,7 +177,6 @@ static void io_worker_ref_put(struct io_wq *wq)
|
||||||
static void io_worker_exit(struct io_worker *worker)
|
static void io_worker_exit(struct io_worker *worker)
|
||||||
{
|
{
|
||||||
struct io_wqe *wqe = worker->wqe;
|
struct io_wqe *wqe = worker->wqe;
|
||||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
|
||||||
|
|
||||||
if (refcount_dec_and_test(&worker->ref))
|
if (refcount_dec_and_test(&worker->ref))
|
||||||
complete(&worker->ref_done);
|
complete(&worker->ref_done);
|
||||||
|
@ -186,7 +186,6 @@ static void io_worker_exit(struct io_worker *worker)
|
||||||
if (worker->flags & IO_WORKER_F_FREE)
|
if (worker->flags & IO_WORKER_F_FREE)
|
||||||
hlist_nulls_del_rcu(&worker->nulls_node);
|
hlist_nulls_del_rcu(&worker->nulls_node);
|
||||||
list_del_rcu(&worker->all_list);
|
list_del_rcu(&worker->all_list);
|
||||||
acct->nr_workers--;
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
io_wqe_dec_running(worker);
|
io_wqe_dec_running(worker);
|
||||||
worker->flags = 0;
|
worker->flags = 0;
|
||||||
|
@ -246,8 +245,6 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
|
||||||
*/
|
*/
|
||||||
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||||
{
|
{
|
||||||
bool do_create = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Most likely an attempt to queue unbounded work on an io_wq that
|
* Most likely an attempt to queue unbounded work on an io_wq that
|
||||||
* wasn't setup with any unbounded workers.
|
* wasn't setup with any unbounded workers.
|
||||||
|
@ -256,18 +253,15 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||||
pr_warn_once("io-wq is not configured for unbound workers");
|
pr_warn_once("io-wq is not configured for unbound workers");
|
||||||
|
|
||||||
raw_spin_lock(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
if (acct->nr_workers < acct->max_workers) {
|
if (acct->nr_workers == acct->max_workers) {
|
||||||
acct->nr_workers++;
|
raw_spin_unlock(&wqe->lock);
|
||||||
do_create = true;
|
return true;
|
||||||
}
|
}
|
||||||
|
acct->nr_workers++;
|
||||||
raw_spin_unlock(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
if (do_create) {
|
atomic_inc(&acct->nr_running);
|
||||||
atomic_inc(&acct->nr_running);
|
atomic_inc(&wqe->wq->worker_refs);
|
||||||
atomic_inc(&wqe->wq->worker_refs);
|
return create_io_worker(wqe->wq, wqe, acct->index);
|
||||||
return create_io_worker(wqe->wq, wqe, acct->index);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_wqe_inc_running(struct io_worker *worker)
|
static void io_wqe_inc_running(struct io_worker *worker)
|
||||||
|
@ -574,6 +568,7 @@ loop:
|
||||||
}
|
}
|
||||||
/* timed out, exit unless we're the last worker */
|
/* timed out, exit unless we're the last worker */
|
||||||
if (last_timeout && acct->nr_workers > 1) {
|
if (last_timeout && acct->nr_workers > 1) {
|
||||||
|
acct->nr_workers--;
|
||||||
raw_spin_unlock(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
break;
|
break;
|
||||||
|
@ -1287,6 +1282,10 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
|
||||||
{
|
{
|
||||||
int i, node, prev = 0;
|
int i, node, prev = 0;
|
||||||
|
|
||||||
|
BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND);
|
||||||
|
BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
|
||||||
|
BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2);
|
||||||
|
|
||||||
for (i = 0; i < 2; i++) {
|
for (i = 0; i < 2; i++) {
|
||||||
if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
|
if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
|
||||||
new_count[i] = task_rlimit(current, RLIMIT_NPROC);
|
new_count[i] = task_rlimit(current, RLIMIT_NPROC);
|
||||||
|
|
105
fs/io_uring.c
105
fs/io_uring.c
|
@ -2843,7 +2843,8 @@ static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
|
||||||
return __io_file_supports_nowait(req->file, rw);
|
return __io_file_supports_nowait(req->file, rw);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
|
int rw)
|
||||||
{
|
{
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
struct kiocb *kiocb = &req->rw.kiocb;
|
struct kiocb *kiocb = &req->rw.kiocb;
|
||||||
|
@ -2865,8 +2866,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
/* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */
|
/*
|
||||||
if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK))
|
* If the file is marked O_NONBLOCK, still allow retry for it if it
|
||||||
|
* supports async. Otherwise it's impossible to use O_NONBLOCK files
|
||||||
|
* reliably. If not, or it IOCB_NOWAIT is set, don't retry.
|
||||||
|
*/
|
||||||
|
if ((kiocb->ki_flags & IOCB_NOWAIT) ||
|
||||||
|
((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw)))
|
||||||
req->flags |= REQ_F_NOWAIT;
|
req->flags |= REQ_F_NOWAIT;
|
||||||
|
|
||||||
ioprio = READ_ONCE(sqe->ioprio);
|
ioprio = READ_ONCE(sqe->ioprio);
|
||||||
|
@ -3263,12 +3269,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
|
||||||
ret = nr;
|
ret = nr;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (!iov_iter_is_bvec(iter)) {
|
||||||
|
iov_iter_advance(iter, nr);
|
||||||
|
} else {
|
||||||
|
req->rw.len -= nr;
|
||||||
|
req->rw.addr += nr;
|
||||||
|
}
|
||||||
ret += nr;
|
ret += nr;
|
||||||
if (nr != iovec.iov_len)
|
if (nr != iovec.iov_len)
|
||||||
break;
|
break;
|
||||||
req->rw.len -= nr;
|
|
||||||
req->rw.addr += nr;
|
|
||||||
iov_iter_advance(iter, nr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -3346,7 +3355,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
{
|
{
|
||||||
if (unlikely(!(req->file->f_mode & FMODE_READ)))
|
if (unlikely(!(req->file->f_mode & FMODE_READ)))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
return io_prep_rw(req, sqe);
|
return io_prep_rw(req, sqe, READ);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3539,7 +3548,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
{
|
{
|
||||||
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
|
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
return io_prep_rw(req, sqe);
|
return io_prep_rw(req, sqe, WRITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_write(struct io_kiocb *req, unsigned int issue_flags)
|
static int io_write(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
|
@ -7515,6 +7524,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
|
||||||
break;
|
break;
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
|
if (uts) {
|
||||||
|
struct timespec64 ts;
|
||||||
|
|
||||||
|
if (get_timespec64(&ts, uts))
|
||||||
|
return -EFAULT;
|
||||||
|
timeout = timespec64_to_jiffies(&ts);
|
||||||
|
}
|
||||||
|
|
||||||
if (sig) {
|
if (sig) {
|
||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
if (in_compat_syscall())
|
if (in_compat_syscall())
|
||||||
|
@ -7528,14 +7545,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uts) {
|
|
||||||
struct timespec64 ts;
|
|
||||||
|
|
||||||
if (get_timespec64(&ts, uts))
|
|
||||||
return -EFAULT;
|
|
||||||
timeout = timespec64_to_jiffies(&ts);
|
|
||||||
}
|
|
||||||
|
|
||||||
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
|
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
|
||||||
iowq.wq.private = current;
|
iowq.wq.private = current;
|
||||||
INIT_LIST_HEAD(&iowq.wq.entry);
|
INIT_LIST_HEAD(&iowq.wq.entry);
|
||||||
|
@ -8284,11 +8293,27 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
|
||||||
|
struct io_rsrc_node *node, void *rsrc)
|
||||||
|
{
|
||||||
|
struct io_rsrc_put *prsrc;
|
||||||
|
|
||||||
|
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
|
||||||
|
if (!prsrc)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
prsrc->tag = *io_get_tag_slot(data, idx);
|
||||||
|
prsrc->rsrc = rsrc;
|
||||||
|
list_add(&prsrc->list, &node->rsrc_list);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
|
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
|
||||||
unsigned int issue_flags, u32 slot_index)
|
unsigned int issue_flags, u32 slot_index)
|
||||||
{
|
{
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
|
||||||
|
bool needs_switch = false;
|
||||||
struct io_fixed_file *file_slot;
|
struct io_fixed_file *file_slot;
|
||||||
int ret = -EBADF;
|
int ret = -EBADF;
|
||||||
|
|
||||||
|
@ -8304,9 +8329,22 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
|
||||||
|
|
||||||
slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
|
slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
|
||||||
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
|
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
|
||||||
ret = -EBADF;
|
|
||||||
if (file_slot->file_ptr)
|
if (file_slot->file_ptr) {
|
||||||
goto err;
|
struct file *old_file;
|
||||||
|
|
||||||
|
ret = io_rsrc_node_switch_start(ctx);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
|
||||||
|
ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
|
||||||
|
ctx->rsrc_node, old_file);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
file_slot->file_ptr = 0;
|
||||||
|
needs_switch = true;
|
||||||
|
}
|
||||||
|
|
||||||
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
|
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
|
||||||
io_fixed_file_set(file_slot, file);
|
io_fixed_file_set(file_slot, file);
|
||||||
|
@ -8318,27 +8356,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
err:
|
err:
|
||||||
|
if (needs_switch)
|
||||||
|
io_rsrc_node_switch(ctx, ctx->file_data);
|
||||||
io_ring_submit_unlock(ctx, !force_nonblock);
|
io_ring_submit_unlock(ctx, !force_nonblock);
|
||||||
if (ret)
|
if (ret)
|
||||||
fput(file);
|
fput(file);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
|
|
||||||
struct io_rsrc_node *node, void *rsrc)
|
|
||||||
{
|
|
||||||
struct io_rsrc_put *prsrc;
|
|
||||||
|
|
||||||
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
|
|
||||||
if (!prsrc)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
prsrc->tag = *io_get_tag_slot(data, idx);
|
|
||||||
prsrc->rsrc = rsrc;
|
|
||||||
list_add(&prsrc->list, &node->rsrc_list);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||||
struct io_uring_rsrc_update2 *up,
|
struct io_uring_rsrc_update2 *up,
|
||||||
unsigned nr_args)
|
unsigned nr_args)
|
||||||
|
@ -10560,10 +10585,12 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
||||||
* ordering. Fine to drop uring_lock here, we hold
|
* ordering. Fine to drop uring_lock here, we hold
|
||||||
* a ref to the ctx.
|
* a ref to the ctx.
|
||||||
*/
|
*/
|
||||||
|
refcount_inc(&sqd->refs);
|
||||||
mutex_unlock(&ctx->uring_lock);
|
mutex_unlock(&ctx->uring_lock);
|
||||||
mutex_lock(&sqd->lock);
|
mutex_lock(&sqd->lock);
|
||||||
mutex_lock(&ctx->uring_lock);
|
mutex_lock(&ctx->uring_lock);
|
||||||
tctx = sqd->thread->io_uring;
|
if (sqd->thread)
|
||||||
|
tctx = sqd->thread->io_uring;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tctx = current->io_uring;
|
tctx = current->io_uring;
|
||||||
|
@ -10577,16 +10604,20 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
if (sqd)
|
if (sqd) {
|
||||||
mutex_unlock(&sqd->lock);
|
mutex_unlock(&sqd->lock);
|
||||||
|
io_put_sq_data(sqd);
|
||||||
|
}
|
||||||
|
|
||||||
if (copy_to_user(arg, new_count, sizeof(new_count)))
|
if (copy_to_user(arg, new_count, sizeof(new_count)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
if (sqd)
|
if (sqd) {
|
||||||
mutex_unlock(&sqd->lock);
|
mutex_unlock(&sqd->lock);
|
||||||
|
io_put_sq_data(sqd);
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -317,13 +317,19 @@ enum {
|
||||||
IORING_REGISTER_IOWQ_AFF = 17,
|
IORING_REGISTER_IOWQ_AFF = 17,
|
||||||
IORING_UNREGISTER_IOWQ_AFF = 18,
|
IORING_UNREGISTER_IOWQ_AFF = 18,
|
||||||
|
|
||||||
/* set/get max number of workers */
|
/* set/get max number of io-wq workers */
|
||||||
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
|
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
|
||||||
|
|
||||||
/* this goes last */
|
/* this goes last */
|
||||||
IORING_REGISTER_LAST
|
IORING_REGISTER_LAST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* io-wq worker categories */
|
||||||
|
enum {
|
||||||
|
IO_WQ_BOUND,
|
||||||
|
IO_WQ_UNBOUND,
|
||||||
|
};
|
||||||
|
|
||||||
/* deprecated, see struct io_uring_rsrc_update */
|
/* deprecated, see struct io_uring_rsrc_update */
|
||||||
struct io_uring_files_update {
|
struct io_uring_files_update {
|
||||||
__u32 offset;
|
__u32 offset;
|
||||||
|
|
Loading…
Reference in New Issue