io_uring-5.6-2020-02-14
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl5GzcYQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgptCNEAC7F8fAOTKDtYxl7vNgoSVd4lUJAllyaYFk gFaxuh6fQXqXnqQhPQxJxHCTqvREqfdbGItLmWp/g3r9bEYZouKflPoVTSdgeTjz 47JoIXhdcv4A26lGO2N+tAuRDk68dcyG31tHqHNcrHhFL+1hcs8pMMf++KD8i/VF kooo56E7Wfh4170q5Jr2pRpVv5O2G/WgkKqbmHRfOXLvvBvCNUDYGfK8Gu8glp7T auMNdRy7+3uia5oBre5zz8oqeSXS8Th6BWUPTJYEE3kl4rnZMnVsRSfCXDszUchP yl59tDM83WR4Jc/uq6baStL/IAjK6y76O3mKIZPdjoxhL3YIzPHOTxiVzynWsFDh abbRqMdmst7c7qYIaWA0S6jZE7uN8eR9huuo+EwKdzXQeKJIYBLo447ysHmaglq3 amzEJUBrw/attgpqakQ1nL6aj5D+XhthuDCYKK5/447ygdNUQfES1ZobzHZVMjx1 74ljXJ24Y/QsFfqntf1szf8mh3lxD3KO85OGgxjYj/iv7mtBCPALTLn3xbwNzJ15 Mrb2xY7n6rxO6gCEmJEJAAs2vGQFewS8MaXQu/fOcUWujRP4a5xNvaL4YDZ6epXh wXHPTrNDxmF948r78EYxF6N7BqR/uRv+razMYyZ/OWVi4CLogwAOoTtfaHhbA04H Jh2GUgotdA== =6MrA -----END PGP SIGNATURE----- Merge tag 'io_uring-5.6-2020-02-14' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: "Here's a set of fixes for io_uring: - Various fixes with cleanups from Pavel, fixing corner cases where we're not correctly dealing with iovec cleanup. - Clarify that statx/openat/openat2 don't accept fixed files - Buffered raw device write EOPTNOTSUPP fix - Ensure async workers grab current->fs - A few task exit fixes with pending requests that grab the file table - send/recvmsg async load fix - io-wq offline node setup fix - CQ overflow flush in poll" * tag 'io_uring-5.6-2020-02-14' of git://git.kernel.dk/linux-block: (21 commits) io_uring: prune request from overflow list on flush io-wq: don't call kXalloc_node() with non-online node io_uring: retain sockaddr_storage across send/recvmsg async punt io_uring: cancel pending async work if task exits io-wq: add io_wq_cancel_pid() to cancel based on a specific pid io-wq: make io_wqe_cancel_work() take a match handler io_uring: fix openat/statx's filename leak io_uring: fix double prep iovec leak io_uring: fix async close() with f_op->flush() io_uring: allow AT_FDCWD for non-file openat/openat2/statx io_uring: grab ->fs as part of async preparation io-wq: add support for inheriting ->fs io_uring: retry raw bdev writes if we hit -EOPNOTSUPP io_uring: add cleanup for openat()/statx() io_uring: fix iovec leaks io_uring: remove unused struct io_async_open io_uring: flush overflowed CQ events in the io_uring_poll() io_uring: statx/openat/openat2 don't support fixed files io_uring: fix deferred req iovec leak io_uring: fix 1-bit bitfields to be unsigned ...
This commit is contained in:
commit
ca60ad6a6b
92
fs/io-wq.c
92
fs/io-wq.c
|
@ -16,6 +16,7 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/rculist_nulls.h>
|
#include <linux/rculist_nulls.h>
|
||||||
|
#include <linux/fs_struct.h>
|
||||||
|
|
||||||
#include "io-wq.h"
|
#include "io-wq.h"
|
||||||
|
|
||||||
|
@ -59,6 +60,7 @@ struct io_worker {
|
||||||
const struct cred *cur_creds;
|
const struct cred *cur_creds;
|
||||||
const struct cred *saved_creds;
|
const struct cred *saved_creds;
|
||||||
struct files_struct *restore_files;
|
struct files_struct *restore_files;
|
||||||
|
struct fs_struct *restore_fs;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if BITS_PER_LONG == 64
|
#if BITS_PER_LONG == 64
|
||||||
|
@ -151,6 +153,9 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
|
||||||
task_unlock(current);
|
task_unlock(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (current->fs != worker->restore_fs)
|
||||||
|
current->fs = worker->restore_fs;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we have an active mm, we need to drop the wq lock before unusing
|
* If we have an active mm, we need to drop the wq lock before unusing
|
||||||
* it. If we do, return true and let the caller retry the idle loop.
|
* it. If we do, return true and let the caller retry the idle loop.
|
||||||
|
@ -311,6 +316,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
|
||||||
|
|
||||||
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
|
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
|
||||||
worker->restore_files = current->files;
|
worker->restore_files = current->files;
|
||||||
|
worker->restore_fs = current->fs;
|
||||||
io_wqe_inc_running(wqe, worker);
|
io_wqe_inc_running(wqe, worker);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -481,6 +487,8 @@ next:
|
||||||
current->files = work->files;
|
current->files = work->files;
|
||||||
task_unlock(current);
|
task_unlock(current);
|
||||||
}
|
}
|
||||||
|
if (work->fs && current->fs != work->fs)
|
||||||
|
current->fs = work->fs;
|
||||||
if (work->mm != worker->mm)
|
if (work->mm != worker->mm)
|
||||||
io_wq_switch_mm(worker, work);
|
io_wq_switch_mm(worker, work);
|
||||||
if (worker->cur_creds != work->creds)
|
if (worker->cur_creds != work->creds)
|
||||||
|
@ -691,11 +699,16 @@ static int io_wq_manager(void *data)
|
||||||
/* create fixed workers */
|
/* create fixed workers */
|
||||||
refcount_set(&wq->refs, workers_to_create);
|
refcount_set(&wq->refs, workers_to_create);
|
||||||
for_each_node(node) {
|
for_each_node(node) {
|
||||||
|
if (!node_online(node))
|
||||||
|
continue;
|
||||||
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
|
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
|
||||||
goto err;
|
goto err;
|
||||||
workers_to_create--;
|
workers_to_create--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (workers_to_create--)
|
||||||
|
refcount_dec(&wq->refs);
|
||||||
|
|
||||||
complete(&wq->done);
|
complete(&wq->done);
|
||||||
|
|
||||||
while (!kthread_should_stop()) {
|
while (!kthread_should_stop()) {
|
||||||
|
@ -703,6 +716,9 @@ static int io_wq_manager(void *data)
|
||||||
struct io_wqe *wqe = wq->wqes[node];
|
struct io_wqe *wqe = wq->wqes[node];
|
||||||
bool fork_worker[2] = { false, false };
|
bool fork_worker[2] = { false, false };
|
||||||
|
|
||||||
|
if (!node_online(node))
|
||||||
|
continue;
|
||||||
|
|
||||||
spin_lock_irq(&wqe->lock);
|
spin_lock_irq(&wqe->lock);
|
||||||
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
|
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
|
||||||
fork_worker[IO_WQ_ACCT_BOUND] = true;
|
fork_worker[IO_WQ_ACCT_BOUND] = true;
|
||||||
|
@ -821,7 +837,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
|
||||||
|
|
||||||
list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
|
list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
|
||||||
if (io_worker_get(worker)) {
|
if (io_worker_get(worker)) {
|
||||||
ret = func(worker, data);
|
/* no task if node is/was offline */
|
||||||
|
if (worker->task)
|
||||||
|
ret = func(worker, data);
|
||||||
io_worker_release(worker);
|
io_worker_release(worker);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
@ -929,17 +947,19 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct work_match {
|
||||||
|
bool (*fn)(struct io_wq_work *, void *data);
|
||||||
|
void *data;
|
||||||
|
};
|
||||||
|
|
||||||
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||||
{
|
{
|
||||||
struct io_wq_work *work = data;
|
struct work_match *match = data;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
if (worker->cur_work != work)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&worker->lock, flags);
|
spin_lock_irqsave(&worker->lock, flags);
|
||||||
if (worker->cur_work == work &&
|
if (match->fn(worker->cur_work, match->data) &&
|
||||||
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
|
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
|
||||||
send_sig(SIGINT, worker->task, 1);
|
send_sig(SIGINT, worker->task, 1);
|
||||||
ret = true;
|
ret = true;
|
||||||
|
@ -950,15 +970,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
struct io_wq_work *cwork)
|
struct work_match *match)
|
||||||
{
|
{
|
||||||
struct io_wq_work_node *node, *prev;
|
struct io_wq_work_node *node, *prev;
|
||||||
struct io_wq_work *work;
|
struct io_wq_work *work;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
|
||||||
cwork->flags |= IO_WQ_WORK_CANCEL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First check pending list, if we're lucky we can just remove it
|
* First check pending list, if we're lucky we can just remove it
|
||||||
* from there. CANCEL_OK means that the work is returned as-new,
|
* from there. CANCEL_OK means that the work is returned as-new,
|
||||||
|
@ -968,7 +986,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||||
work = container_of(node, struct io_wq_work, list);
|
work = container_of(node, struct io_wq_work, list);
|
||||||
|
|
||||||
if (work == cwork) {
|
if (match->fn(work, match->data)) {
|
||||||
wq_node_del(&wqe->work_list, node, prev);
|
wq_node_del(&wqe->work_list, node, prev);
|
||||||
found = true;
|
found = true;
|
||||||
break;
|
break;
|
||||||
|
@ -989,20 +1007,60 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
* completion will run normally in this case.
|
* completion will run normally in this case.
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, cwork);
|
found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
|
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool io_wq_work_match(struct io_wq_work *work, void *data)
|
||||||
|
{
|
||||||
|
return work == data;
|
||||||
|
}
|
||||||
|
|
||||||
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
|
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
|
||||||
{
|
{
|
||||||
|
struct work_match match = {
|
||||||
|
.fn = io_wq_work_match,
|
||||||
|
.data = cwork
|
||||||
|
};
|
||||||
|
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
||||||
|
int node;
|
||||||
|
|
||||||
|
cwork->flags |= IO_WQ_WORK_CANCEL;
|
||||||
|
|
||||||
|
for_each_node(node) {
|
||||||
|
struct io_wqe *wqe = wq->wqes[node];
|
||||||
|
|
||||||
|
ret = io_wqe_cancel_work(wqe, &match);
|
||||||
|
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool io_wq_pid_match(struct io_wq_work *work, void *data)
|
||||||
|
{
|
||||||
|
pid_t pid = (pid_t) (unsigned long) data;
|
||||||
|
|
||||||
|
if (work)
|
||||||
|
return work->task_pid == pid;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
|
||||||
|
{
|
||||||
|
struct work_match match = {
|
||||||
|
.fn = io_wq_pid_match,
|
||||||
|
.data = (void *) (unsigned long) pid
|
||||||
|
};
|
||||||
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
||||||
int node;
|
int node;
|
||||||
|
|
||||||
for_each_node(node) {
|
for_each_node(node) {
|
||||||
struct io_wqe *wqe = wq->wqes[node];
|
struct io_wqe *wqe = wq->wqes[node];
|
||||||
|
|
||||||
ret = io_wqe_cancel_work(wqe, cwork);
|
ret = io_wqe_cancel_work(wqe, &match);
|
||||||
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1036,6 +1094,8 @@ void io_wq_flush(struct io_wq *wq)
|
||||||
for_each_node(node) {
|
for_each_node(node) {
|
||||||
struct io_wqe *wqe = wq->wqes[node];
|
struct io_wqe *wqe = wq->wqes[node];
|
||||||
|
|
||||||
|
if (!node_online(node))
|
||||||
|
continue;
|
||||||
init_completion(&data.done);
|
init_completion(&data.done);
|
||||||
INIT_IO_WORK(&data.work, io_wq_flush_func);
|
INIT_IO_WORK(&data.work, io_wq_flush_func);
|
||||||
data.work.flags |= IO_WQ_WORK_INTERNAL;
|
data.work.flags |= IO_WQ_WORK_INTERNAL;
|
||||||
|
@ -1067,12 +1127,15 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||||
|
|
||||||
for_each_node(node) {
|
for_each_node(node) {
|
||||||
struct io_wqe *wqe;
|
struct io_wqe *wqe;
|
||||||
|
int alloc_node = node;
|
||||||
|
|
||||||
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node);
|
if (!node_online(alloc_node))
|
||||||
|
alloc_node = NUMA_NO_NODE;
|
||||||
|
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
|
||||||
if (!wqe)
|
if (!wqe)
|
||||||
goto err;
|
goto err;
|
||||||
wq->wqes[node] = wqe;
|
wq->wqes[node] = wqe;
|
||||||
wqe->node = node;
|
wqe->node = alloc_node;
|
||||||
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
|
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
|
||||||
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
|
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
|
||||||
if (wq->user) {
|
if (wq->user) {
|
||||||
|
@ -1080,7 +1143,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||||
task_rlimit(current, RLIMIT_NPROC);
|
task_rlimit(current, RLIMIT_NPROC);
|
||||||
}
|
}
|
||||||
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
|
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
|
||||||
wqe->node = node;
|
|
||||||
wqe->wq = wq;
|
wqe->wq = wq;
|
||||||
spin_lock_init(&wqe->lock);
|
spin_lock_init(&wqe->lock);
|
||||||
INIT_WQ_LIST(&wqe->work_list);
|
INIT_WQ_LIST(&wqe->work_list);
|
||||||
|
|
|
@ -74,17 +74,20 @@ struct io_wq_work {
|
||||||
struct files_struct *files;
|
struct files_struct *files;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
const struct cred *creds;
|
const struct cred *creds;
|
||||||
|
struct fs_struct *fs;
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
|
pid_t task_pid;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define INIT_IO_WORK(work, _func) \
|
#define INIT_IO_WORK(work, _func) \
|
||||||
do { \
|
do { \
|
||||||
(work)->list.next = NULL; \
|
(work)->list.next = NULL; \
|
||||||
(work)->func = _func; \
|
(work)->func = _func; \
|
||||||
(work)->flags = 0; \
|
|
||||||
(work)->files = NULL; \
|
(work)->files = NULL; \
|
||||||
(work)->mm = NULL; \
|
(work)->mm = NULL; \
|
||||||
(work)->creds = NULL; \
|
(work)->creds = NULL; \
|
||||||
|
(work)->fs = NULL; \
|
||||||
|
(work)->flags = 0; \
|
||||||
} while (0) \
|
} while (0) \
|
||||||
|
|
||||||
typedef void (get_work_fn)(struct io_wq_work *);
|
typedef void (get_work_fn)(struct io_wq_work *);
|
||||||
|
@ -107,6 +110,7 @@ void io_wq_flush(struct io_wq *wq);
|
||||||
|
|
||||||
void io_wq_cancel_all(struct io_wq *wq);
|
void io_wq_cancel_all(struct io_wq *wq);
|
||||||
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
|
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
|
||||||
|
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid);
|
||||||
|
|
||||||
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
||||||
|
|
||||||
|
|
299
fs/io_uring.c
299
fs/io_uring.c
|
@ -75,6 +75,7 @@
|
||||||
#include <linux/fsnotify.h>
|
#include <linux/fsnotify.h>
|
||||||
#include <linux/fadvise.h>
|
#include <linux/fadvise.h>
|
||||||
#include <linux/eventpoll.h>
|
#include <linux/eventpoll.h>
|
||||||
|
#include <linux/fs_struct.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include <trace/events/io_uring.h>
|
#include <trace/events/io_uring.h>
|
||||||
|
@ -204,11 +205,11 @@ struct io_ring_ctx {
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
int compat: 1;
|
unsigned int compat: 1;
|
||||||
int account_mem: 1;
|
unsigned int account_mem: 1;
|
||||||
int cq_overflow_flushed: 1;
|
unsigned int cq_overflow_flushed: 1;
|
||||||
int drain_next: 1;
|
unsigned int drain_next: 1;
|
||||||
int eventfd_async: 1;
|
unsigned int eventfd_async: 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ring buffer of indices into array of io_uring_sqe, which is
|
* Ring buffer of indices into array of io_uring_sqe, which is
|
||||||
|
@ -441,6 +442,7 @@ struct io_async_msghdr {
|
||||||
struct iovec *iov;
|
struct iovec *iov;
|
||||||
struct sockaddr __user *uaddr;
|
struct sockaddr __user *uaddr;
|
||||||
struct msghdr msg;
|
struct msghdr msg;
|
||||||
|
struct sockaddr_storage addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct io_async_rw {
|
struct io_async_rw {
|
||||||
|
@ -450,17 +452,12 @@ struct io_async_rw {
|
||||||
ssize_t size;
|
ssize_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct io_async_open {
|
|
||||||
struct filename *filename;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct io_async_ctx {
|
struct io_async_ctx {
|
||||||
union {
|
union {
|
||||||
struct io_async_rw rw;
|
struct io_async_rw rw;
|
||||||
struct io_async_msghdr msg;
|
struct io_async_msghdr msg;
|
||||||
struct io_async_connect connect;
|
struct io_async_connect connect;
|
||||||
struct io_timeout_data timeout;
|
struct io_timeout_data timeout;
|
||||||
struct io_async_open open;
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -483,6 +480,8 @@ enum {
|
||||||
REQ_F_MUST_PUNT_BIT,
|
REQ_F_MUST_PUNT_BIT,
|
||||||
REQ_F_TIMEOUT_NOSEQ_BIT,
|
REQ_F_TIMEOUT_NOSEQ_BIT,
|
||||||
REQ_F_COMP_LOCKED_BIT,
|
REQ_F_COMP_LOCKED_BIT,
|
||||||
|
REQ_F_NEED_CLEANUP_BIT,
|
||||||
|
REQ_F_OVERFLOW_BIT,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@ -521,6 +520,10 @@ enum {
|
||||||
REQ_F_TIMEOUT_NOSEQ = BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
|
REQ_F_TIMEOUT_NOSEQ = BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
|
||||||
/* completion under lock */
|
/* completion under lock */
|
||||||
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
|
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
|
||||||
|
/* needs cleanup */
|
||||||
|
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
|
||||||
|
/* in overflow list */
|
||||||
|
REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -553,7 +556,6 @@ struct io_kiocb {
|
||||||
* llist_node is only used for poll deferred completions
|
* llist_node is only used for poll deferred completions
|
||||||
*/
|
*/
|
||||||
struct llist_node llist_node;
|
struct llist_node llist_node;
|
||||||
bool has_user;
|
|
||||||
bool in_async;
|
bool in_async;
|
||||||
bool needs_fixed_file;
|
bool needs_fixed_file;
|
||||||
u8 opcode;
|
u8 opcode;
|
||||||
|
@ -614,6 +616,8 @@ struct io_op_def {
|
||||||
unsigned not_supported : 1;
|
unsigned not_supported : 1;
|
||||||
/* needs file table */
|
/* needs file table */
|
||||||
unsigned file_table : 1;
|
unsigned file_table : 1;
|
||||||
|
/* needs ->fs */
|
||||||
|
unsigned needs_fs : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct io_op_def io_op_defs[] = {
|
static const struct io_op_def io_op_defs[] = {
|
||||||
|
@ -656,12 +660,14 @@ static const struct io_op_def io_op_defs[] = {
|
||||||
.needs_mm = 1,
|
.needs_mm = 1,
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
.unbound_nonreg_file = 1,
|
.unbound_nonreg_file = 1,
|
||||||
|
.needs_fs = 1,
|
||||||
},
|
},
|
||||||
[IORING_OP_RECVMSG] = {
|
[IORING_OP_RECVMSG] = {
|
||||||
.async_ctx = 1,
|
.async_ctx = 1,
|
||||||
.needs_mm = 1,
|
.needs_mm = 1,
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
.unbound_nonreg_file = 1,
|
.unbound_nonreg_file = 1,
|
||||||
|
.needs_fs = 1,
|
||||||
},
|
},
|
||||||
[IORING_OP_TIMEOUT] = {
|
[IORING_OP_TIMEOUT] = {
|
||||||
.async_ctx = 1,
|
.async_ctx = 1,
|
||||||
|
@ -692,6 +698,7 @@ static const struct io_op_def io_op_defs[] = {
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
.fd_non_neg = 1,
|
.fd_non_neg = 1,
|
||||||
.file_table = 1,
|
.file_table = 1,
|
||||||
|
.needs_fs = 1,
|
||||||
},
|
},
|
||||||
[IORING_OP_CLOSE] = {
|
[IORING_OP_CLOSE] = {
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
|
@ -705,6 +712,7 @@ static const struct io_op_def io_op_defs[] = {
|
||||||
.needs_mm = 1,
|
.needs_mm = 1,
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
.fd_non_neg = 1,
|
.fd_non_neg = 1,
|
||||||
|
.needs_fs = 1,
|
||||||
},
|
},
|
||||||
[IORING_OP_READ] = {
|
[IORING_OP_READ] = {
|
||||||
.needs_mm = 1,
|
.needs_mm = 1,
|
||||||
|
@ -736,6 +744,7 @@ static const struct io_op_def io_op_defs[] = {
|
||||||
.needs_file = 1,
|
.needs_file = 1,
|
||||||
.fd_non_neg = 1,
|
.fd_non_neg = 1,
|
||||||
.file_table = 1,
|
.file_table = 1,
|
||||||
|
.needs_fs = 1,
|
||||||
},
|
},
|
||||||
[IORING_OP_EPOLL_CTL] = {
|
[IORING_OP_EPOLL_CTL] = {
|
||||||
.unbound_nonreg_file = 1,
|
.unbound_nonreg_file = 1,
|
||||||
|
@ -754,6 +763,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||||
unsigned nr_args);
|
unsigned nr_args);
|
||||||
static int io_grab_files(struct io_kiocb *req);
|
static int io_grab_files(struct io_kiocb *req);
|
||||||
static void io_ring_file_ref_flush(struct fixed_file_data *data);
|
static void io_ring_file_ref_flush(struct fixed_file_data *data);
|
||||||
|
static void io_cleanup_req(struct io_kiocb *req);
|
||||||
|
|
||||||
static struct kmem_cache *req_cachep;
|
static struct kmem_cache *req_cachep;
|
||||||
|
|
||||||
|
@ -909,6 +919,18 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
|
||||||
}
|
}
|
||||||
if (!req->work.creds)
|
if (!req->work.creds)
|
||||||
req->work.creds = get_current_cred();
|
req->work.creds = get_current_cred();
|
||||||
|
if (!req->work.fs && def->needs_fs) {
|
||||||
|
spin_lock(¤t->fs->lock);
|
||||||
|
if (!current->fs->in_exec) {
|
||||||
|
req->work.fs = current->fs;
|
||||||
|
req->work.fs->users++;
|
||||||
|
} else {
|
||||||
|
req->work.flags |= IO_WQ_WORK_CANCEL;
|
||||||
|
}
|
||||||
|
spin_unlock(¤t->fs->lock);
|
||||||
|
}
|
||||||
|
if (!req->work.task_pid)
|
||||||
|
req->work.task_pid = task_pid_vnr(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_req_work_drop_env(struct io_kiocb *req)
|
static inline void io_req_work_drop_env(struct io_kiocb *req)
|
||||||
|
@ -921,6 +943,16 @@ static inline void io_req_work_drop_env(struct io_kiocb *req)
|
||||||
put_cred(req->work.creds);
|
put_cred(req->work.creds);
|
||||||
req->work.creds = NULL;
|
req->work.creds = NULL;
|
||||||
}
|
}
|
||||||
|
if (req->work.fs) {
|
||||||
|
struct fs_struct *fs = req->work.fs;
|
||||||
|
|
||||||
|
spin_lock(&req->work.fs->lock);
|
||||||
|
if (--fs->users)
|
||||||
|
fs = NULL;
|
||||||
|
spin_unlock(&req->work.fs->lock);
|
||||||
|
if (fs)
|
||||||
|
free_fs_struct(fs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool io_prep_async_work(struct io_kiocb *req,
|
static inline bool io_prep_async_work(struct io_kiocb *req,
|
||||||
|
@ -1074,6 +1106,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
|
||||||
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
|
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
|
||||||
list);
|
list);
|
||||||
list_move(&req->list, &list);
|
list_move(&req->list, &list);
|
||||||
|
req->flags &= ~REQ_F_OVERFLOW;
|
||||||
if (cqe) {
|
if (cqe) {
|
||||||
WRITE_ONCE(cqe->user_data, req->user_data);
|
WRITE_ONCE(cqe->user_data, req->user_data);
|
||||||
WRITE_ONCE(cqe->res, req->result);
|
WRITE_ONCE(cqe->res, req->result);
|
||||||
|
@ -1126,6 +1159,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
|
||||||
set_bit(0, &ctx->sq_check_overflow);
|
set_bit(0, &ctx->sq_check_overflow);
|
||||||
set_bit(0, &ctx->cq_check_overflow);
|
set_bit(0, &ctx->cq_check_overflow);
|
||||||
}
|
}
|
||||||
|
req->flags |= REQ_F_OVERFLOW;
|
||||||
refcount_inc(&req->refs);
|
refcount_inc(&req->refs);
|
||||||
req->result = res;
|
req->result = res;
|
||||||
list_add_tail(&req->list, &ctx->cq_overflow_list);
|
list_add_tail(&req->list, &ctx->cq_overflow_list);
|
||||||
|
@ -1241,6 +1275,9 @@ static void __io_free_req(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
__io_req_aux_free(req);
|
__io_req_aux_free(req);
|
||||||
|
|
||||||
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
||||||
|
io_cleanup_req(req);
|
||||||
|
|
||||||
if (req->flags & REQ_F_INFLIGHT) {
|
if (req->flags & REQ_F_INFLIGHT) {
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
@ -2056,9 +2093,6 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
|
||||||
return iorw->size;
|
return iorw->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!req->has_user)
|
|
||||||
return -EFAULT;
|
|
||||||
|
|
||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
if (req->ctx->compat)
|
if (req->ctx->compat)
|
||||||
return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
|
return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
|
||||||
|
@ -2137,6 +2171,8 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
|
||||||
req->io->rw.iov = req->io->rw.fast_iov;
|
req->io->rw.iov = req->io->rw.fast_iov;
|
||||||
memcpy(req->io->rw.iov, fast_iov,
|
memcpy(req->io->rw.iov, fast_iov,
|
||||||
sizeof(struct iovec) * iter->nr_segs);
|
sizeof(struct iovec) * iter->nr_segs);
|
||||||
|
} else {
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2148,17 +2184,6 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
|
||||||
return req->io == NULL;
|
return req->io == NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_rw_async(struct io_wq_work **workptr)
|
|
||||||
{
|
|
||||||
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
|
|
||||||
struct iovec *iov = NULL;
|
|
||||||
|
|
||||||
if (req->io->rw.iov != req->io->rw.fast_iov)
|
|
||||||
iov = req->io->rw.iov;
|
|
||||||
io_wq_submit_work(workptr);
|
|
||||||
kfree(iov);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
|
static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
|
||||||
struct iovec *iovec, struct iovec *fast_iov,
|
struct iovec *iovec, struct iovec *fast_iov,
|
||||||
struct iov_iter *iter)
|
struct iov_iter *iter)
|
||||||
|
@ -2171,7 +2196,6 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
|
||||||
|
|
||||||
io_req_map_rw(req, io_size, iovec, fast_iov, iter);
|
io_req_map_rw(req, io_size, iovec, fast_iov, iter);
|
||||||
}
|
}
|
||||||
req->work.func = io_rw_async;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2189,7 +2213,8 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
if (unlikely(!(req->file->f_mode & FMODE_READ)))
|
if (unlikely(!(req->file->f_mode & FMODE_READ)))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
if (!req->io)
|
/* either don't need iovec imported or already have it */
|
||||||
|
if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
io = req->io;
|
io = req->io;
|
||||||
|
@ -2258,8 +2283,8 @@ copy_iov:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_free:
|
out_free:
|
||||||
if (!io_wq_current_is_worker())
|
kfree(iovec);
|
||||||
kfree(iovec);
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2277,7 +2302,8 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
|
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
if (!req->io)
|
/* either don't need iovec imported or already have it */
|
||||||
|
if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
io = req->io;
|
io = req->io;
|
||||||
|
@ -2352,6 +2378,12 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
ret2 = call_write_iter(req->file, kiocb, &iter);
|
ret2 = call_write_iter(req->file, kiocb, &iter);
|
||||||
else
|
else
|
||||||
ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
|
ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
|
||||||
|
/*
|
||||||
|
* Raw bdev writes will -EOPNOTSUPP for IOCB_NOWAIT. Just
|
||||||
|
* retry them without IOCB_NOWAIT.
|
||||||
|
*/
|
||||||
|
if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
|
||||||
|
ret2 = -EAGAIN;
|
||||||
if (!force_nonblock || ret2 != -EAGAIN) {
|
if (!force_nonblock || ret2 != -EAGAIN) {
|
||||||
kiocb_done(kiocb, ret2, nxt, req->in_async);
|
kiocb_done(kiocb, ret2, nxt, req->in_async);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2364,8 +2396,8 @@ copy_iov:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_free:
|
out_free:
|
||||||
if (!io_wq_current_is_worker())
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
kfree(iovec);
|
kfree(iovec);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2534,6 +2566,10 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
|
|
||||||
if (sqe->ioprio || sqe->buf_index)
|
if (sqe->ioprio || sqe->buf_index)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (sqe->flags & IOSQE_FIXED_FILE)
|
||||||
|
return -EBADF;
|
||||||
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
||||||
|
return 0;
|
||||||
|
|
||||||
req->open.dfd = READ_ONCE(sqe->fd);
|
req->open.dfd = READ_ONCE(sqe->fd);
|
||||||
req->open.how.mode = READ_ONCE(sqe->len);
|
req->open.how.mode = READ_ONCE(sqe->len);
|
||||||
|
@ -2547,6 +2583,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2559,6 +2596,10 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
|
|
||||||
if (sqe->ioprio || sqe->buf_index)
|
if (sqe->ioprio || sqe->buf_index)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (sqe->flags & IOSQE_FIXED_FILE)
|
||||||
|
return -EBADF;
|
||||||
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
||||||
|
return 0;
|
||||||
|
|
||||||
req->open.dfd = READ_ONCE(sqe->fd);
|
req->open.dfd = READ_ONCE(sqe->fd);
|
||||||
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||||
|
@ -2583,6 +2624,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2614,6 +2656,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
}
|
}
|
||||||
err:
|
err:
|
||||||
putname(req->open.filename);
|
putname(req->open.filename);
|
||||||
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
req_set_fail_links(req);
|
req_set_fail_links(req);
|
||||||
io_cqring_add_event(req, ret);
|
io_cqring_add_event(req, ret);
|
||||||
|
@ -2754,6 +2797,10 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
|
|
||||||
if (sqe->ioprio || sqe->buf_index)
|
if (sqe->ioprio || sqe->buf_index)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (sqe->flags & IOSQE_FIXED_FILE)
|
||||||
|
return -EBADF;
|
||||||
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
||||||
|
return 0;
|
||||||
|
|
||||||
req->open.dfd = READ_ONCE(sqe->fd);
|
req->open.dfd = READ_ONCE(sqe->fd);
|
||||||
req->open.mask = READ_ONCE(sqe->len);
|
req->open.mask = READ_ONCE(sqe->len);
|
||||||
|
@ -2771,6 +2818,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2808,6 +2856,7 @@ retry:
|
||||||
ret = cp_statx(&stat, ctx->buffer);
|
ret = cp_statx(&stat, ctx->buffer);
|
||||||
err:
|
err:
|
||||||
putname(ctx->filename);
|
putname(ctx->filename);
|
||||||
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
req_set_fail_links(req);
|
req_set_fail_links(req);
|
||||||
io_cqring_add_event(req, ret);
|
io_cqring_add_event(req, ret);
|
||||||
|
@ -2827,7 +2876,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
sqe->rw_flags || sqe->buf_index)
|
sqe->rw_flags || sqe->buf_index)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (sqe->flags & IOSQE_FIXED_FILE)
|
if (sqe->flags & IOSQE_FIXED_FILE)
|
||||||
return -EINVAL;
|
return -EBADF;
|
||||||
|
|
||||||
req->close.fd = READ_ONCE(sqe->fd);
|
req->close.fd = READ_ONCE(sqe->fd);
|
||||||
if (req->file->f_op == &io_uring_fops ||
|
if (req->file->f_op == &io_uring_fops ||
|
||||||
|
@ -2837,24 +2886,25 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* only called when __close_fd_get_file() is done */
|
||||||
|
static void __io_close_finish(struct io_kiocb *req, struct io_kiocb **nxt)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = filp_close(req->close.put_file, req->work.files);
|
||||||
|
if (ret < 0)
|
||||||
|
req_set_fail_links(req);
|
||||||
|
io_cqring_add_event(req, ret);
|
||||||
|
fput(req->close.put_file);
|
||||||
|
io_put_req_find_next(req, nxt);
|
||||||
|
}
|
||||||
|
|
||||||
static void io_close_finish(struct io_wq_work **workptr)
|
static void io_close_finish(struct io_wq_work **workptr)
|
||||||
{
|
{
|
||||||
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
|
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
|
||||||
struct io_kiocb *nxt = NULL;
|
struct io_kiocb *nxt = NULL;
|
||||||
|
|
||||||
/* Invoked with files, we need to do the close */
|
__io_close_finish(req, &nxt);
|
||||||
if (req->work.files) {
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = filp_close(req->close.put_file, req->work.files);
|
|
||||||
if (ret < 0)
|
|
||||||
req_set_fail_links(req);
|
|
||||||
io_cqring_add_event(req, ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
fput(req->close.put_file);
|
|
||||||
|
|
||||||
io_put_req_find_next(req, &nxt);
|
|
||||||
if (nxt)
|
if (nxt)
|
||||||
io_wq_assign_next(workptr, nxt);
|
io_wq_assign_next(workptr, nxt);
|
||||||
}
|
}
|
||||||
|
@ -2877,22 +2927,8 @@ static int io_close(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
* No ->flush(), safely close from here and just punt the
|
* No ->flush(), safely close from here and just punt the
|
||||||
* fput() to async context.
|
* fput() to async context.
|
||||||
*/
|
*/
|
||||||
ret = filp_close(req->close.put_file, current->files);
|
__io_close_finish(req, nxt);
|
||||||
|
return 0;
|
||||||
if (ret < 0)
|
|
||||||
req_set_fail_links(req);
|
|
||||||
io_cqring_add_event(req, ret);
|
|
||||||
|
|
||||||
if (io_wq_current_is_worker()) {
|
|
||||||
struct io_wq_work *old_work, *work;
|
|
||||||
|
|
||||||
old_work = work = &req->work;
|
|
||||||
io_close_finish(&work);
|
|
||||||
if (work && work != old_work)
|
|
||||||
*nxt = container_of(work, struct io_kiocb, work);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
eagain:
|
eagain:
|
||||||
req->work.func = io_close_finish;
|
req->work.func = io_close_finish;
|
||||||
/*
|
/*
|
||||||
|
@ -2960,24 +2996,12 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_NET)
|
|
||||||
static void io_sendrecv_async(struct io_wq_work **workptr)
|
|
||||||
{
|
|
||||||
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
|
|
||||||
struct iovec *iov = NULL;
|
|
||||||
|
|
||||||
if (req->io->rw.iov != req->io->rw.fast_iov)
|
|
||||||
iov = req->io->msg.iov;
|
|
||||||
io_wq_submit_work(workptr);
|
|
||||||
kfree(iov);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
{
|
{
|
||||||
#if defined(CONFIG_NET)
|
#if defined(CONFIG_NET)
|
||||||
struct io_sr_msg *sr = &req->sr_msg;
|
struct io_sr_msg *sr = &req->sr_msg;
|
||||||
struct io_async_ctx *io = req->io;
|
struct io_async_ctx *io = req->io;
|
||||||
|
int ret;
|
||||||
|
|
||||||
sr->msg_flags = READ_ONCE(sqe->msg_flags);
|
sr->msg_flags = READ_ONCE(sqe->msg_flags);
|
||||||
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||||
|
@ -2985,10 +3009,16 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
|
|
||||||
if (!io || req->opcode == IORING_OP_SEND)
|
if (!io || req->opcode == IORING_OP_SEND)
|
||||||
return 0;
|
return 0;
|
||||||
|
/* iovec is already imported */
|
||||||
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
||||||
|
return 0;
|
||||||
|
|
||||||
io->msg.iov = io->msg.fast_iov;
|
io->msg.iov = io->msg.fast_iov;
|
||||||
return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
|
ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
|
||||||
&io->msg.iov);
|
&io->msg.iov);
|
||||||
|
if (!ret)
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
|
return ret;
|
||||||
#else
|
#else
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
#endif
|
#endif
|
||||||
|
@ -3008,12 +3038,11 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
sock = sock_from_file(req->file, &ret);
|
sock = sock_from_file(req->file, &ret);
|
||||||
if (sock) {
|
if (sock) {
|
||||||
struct io_async_ctx io;
|
struct io_async_ctx io;
|
||||||
struct sockaddr_storage addr;
|
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
|
|
||||||
if (req->io) {
|
if (req->io) {
|
||||||
kmsg = &req->io->msg;
|
kmsg = &req->io->msg;
|
||||||
kmsg->msg.msg_name = &addr;
|
kmsg->msg.msg_name = &req->io->msg.addr;
|
||||||
/* if iov is set, it's allocated already */
|
/* if iov is set, it's allocated already */
|
||||||
if (!kmsg->iov)
|
if (!kmsg->iov)
|
||||||
kmsg->iov = kmsg->fast_iov;
|
kmsg->iov = kmsg->fast_iov;
|
||||||
|
@ -3022,7 +3051,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
struct io_sr_msg *sr = &req->sr_msg;
|
struct io_sr_msg *sr = &req->sr_msg;
|
||||||
|
|
||||||
kmsg = &io.msg;
|
kmsg = &io.msg;
|
||||||
kmsg->msg.msg_name = &addr;
|
kmsg->msg.msg_name = &io.msg.addr;
|
||||||
|
|
||||||
io.msg.iov = io.msg.fast_iov;
|
io.msg.iov = io.msg.fast_iov;
|
||||||
ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
|
ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
|
||||||
|
@ -3041,18 +3070,22 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
if (force_nonblock && ret == -EAGAIN) {
|
if (force_nonblock && ret == -EAGAIN) {
|
||||||
if (req->io)
|
if (req->io)
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
if (io_alloc_async_ctx(req))
|
if (io_alloc_async_ctx(req)) {
|
||||||
|
if (kmsg && kmsg->iov != kmsg->fast_iov)
|
||||||
|
kfree(kmsg->iov);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
|
memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
|
||||||
req->work.func = io_sendrecv_async;
|
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
if (ret == -ERESTARTSYS)
|
if (ret == -ERESTARTSYS)
|
||||||
ret = -EINTR;
|
ret = -EINTR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
|
if (kmsg && kmsg->iov != kmsg->fast_iov)
|
||||||
kfree(kmsg->iov);
|
kfree(kmsg->iov);
|
||||||
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
io_cqring_add_event(req, ret);
|
io_cqring_add_event(req, ret);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
req_set_fail_links(req);
|
req_set_fail_links(req);
|
||||||
|
@ -3120,6 +3153,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
|
||||||
#if defined(CONFIG_NET)
|
#if defined(CONFIG_NET)
|
||||||
struct io_sr_msg *sr = &req->sr_msg;
|
struct io_sr_msg *sr = &req->sr_msg;
|
||||||
struct io_async_ctx *io = req->io;
|
struct io_async_ctx *io = req->io;
|
||||||
|
int ret;
|
||||||
|
|
||||||
sr->msg_flags = READ_ONCE(sqe->msg_flags);
|
sr->msg_flags = READ_ONCE(sqe->msg_flags);
|
||||||
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||||
|
@ -3127,10 +3161,16 @@ static int io_recvmsg_prep(struct io_kiocb *req,
|
||||||
|
|
||||||
if (!io || req->opcode == IORING_OP_RECV)
|
if (!io || req->opcode == IORING_OP_RECV)
|
||||||
return 0;
|
return 0;
|
||||||
|
/* iovec is already imported */
|
||||||
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
||||||
|
return 0;
|
||||||
|
|
||||||
io->msg.iov = io->msg.fast_iov;
|
io->msg.iov = io->msg.fast_iov;
|
||||||
return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
|
ret = recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
|
||||||
&io->msg.uaddr, &io->msg.iov);
|
&io->msg.uaddr, &io->msg.iov);
|
||||||
|
if (!ret)
|
||||||
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
|
return ret;
|
||||||
#else
|
#else
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
#endif
|
#endif
|
||||||
|
@ -3150,12 +3190,11 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
sock = sock_from_file(req->file, &ret);
|
sock = sock_from_file(req->file, &ret);
|
||||||
if (sock) {
|
if (sock) {
|
||||||
struct io_async_ctx io;
|
struct io_async_ctx io;
|
||||||
struct sockaddr_storage addr;
|
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
|
|
||||||
if (req->io) {
|
if (req->io) {
|
||||||
kmsg = &req->io->msg;
|
kmsg = &req->io->msg;
|
||||||
kmsg->msg.msg_name = &addr;
|
kmsg->msg.msg_name = &req->io->msg.addr;
|
||||||
/* if iov is set, it's allocated already */
|
/* if iov is set, it's allocated already */
|
||||||
if (!kmsg->iov)
|
if (!kmsg->iov)
|
||||||
kmsg->iov = kmsg->fast_iov;
|
kmsg->iov = kmsg->fast_iov;
|
||||||
|
@ -3164,7 +3203,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
struct io_sr_msg *sr = &req->sr_msg;
|
struct io_sr_msg *sr = &req->sr_msg;
|
||||||
|
|
||||||
kmsg = &io.msg;
|
kmsg = &io.msg;
|
||||||
kmsg->msg.msg_name = &addr;
|
kmsg->msg.msg_name = &io.msg.addr;
|
||||||
|
|
||||||
io.msg.iov = io.msg.fast_iov;
|
io.msg.iov = io.msg.fast_iov;
|
||||||
ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
|
ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
|
||||||
|
@ -3185,18 +3224,22 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
|
||||||
if (force_nonblock && ret == -EAGAIN) {
|
if (force_nonblock && ret == -EAGAIN) {
|
||||||
if (req->io)
|
if (req->io)
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
if (io_alloc_async_ctx(req))
|
if (io_alloc_async_ctx(req)) {
|
||||||
|
if (kmsg && kmsg->iov != kmsg->fast_iov)
|
||||||
|
kfree(kmsg->iov);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
}
|
||||||
memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
|
memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
|
||||||
req->work.func = io_sendrecv_async;
|
req->flags |= REQ_F_NEED_CLEANUP;
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
if (ret == -ERESTARTSYS)
|
if (ret == -ERESTARTSYS)
|
||||||
ret = -EINTR;
|
ret = -EINTR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
|
if (kmsg && kmsg->iov != kmsg->fast_iov)
|
||||||
kfree(kmsg->iov);
|
kfree(kmsg->iov);
|
||||||
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
io_cqring_add_event(req, ret);
|
io_cqring_add_event(req, ret);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
req_set_fail_links(req);
|
req_set_fail_links(req);
|
||||||
|
@ -4207,6 +4250,35 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
return -EIOCBQUEUED;
|
return -EIOCBQUEUED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void io_cleanup_req(struct io_kiocb *req)
|
||||||
|
{
|
||||||
|
struct io_async_ctx *io = req->io;
|
||||||
|
|
||||||
|
switch (req->opcode) {
|
||||||
|
case IORING_OP_READV:
|
||||||
|
case IORING_OP_READ_FIXED:
|
||||||
|
case IORING_OP_READ:
|
||||||
|
case IORING_OP_WRITEV:
|
||||||
|
case IORING_OP_WRITE_FIXED:
|
||||||
|
case IORING_OP_WRITE:
|
||||||
|
if (io->rw.iov != io->rw.fast_iov)
|
||||||
|
kfree(io->rw.iov);
|
||||||
|
break;
|
||||||
|
case IORING_OP_SENDMSG:
|
||||||
|
case IORING_OP_RECVMSG:
|
||||||
|
if (io->msg.iov != io->msg.fast_iov)
|
||||||
|
kfree(io->msg.iov);
|
||||||
|
break;
|
||||||
|
case IORING_OP_OPENAT:
|
||||||
|
case IORING_OP_OPENAT2:
|
||||||
|
case IORING_OP_STATX:
|
||||||
|
putname(req->open.filename);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
struct io_kiocb **nxt, bool force_nonblock)
|
struct io_kiocb **nxt, bool force_nonblock)
|
||||||
{
|
{
|
||||||
|
@ -4446,7 +4518,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0;
|
|
||||||
req->in_async = true;
|
req->in_async = true;
|
||||||
do {
|
do {
|
||||||
ret = io_issue_sqe(req, NULL, &nxt, false);
|
ret = io_issue_sqe(req, NULL, &nxt, false);
|
||||||
|
@ -4479,7 +4550,7 @@ static int io_req_needs_file(struct io_kiocb *req, int fd)
|
||||||
{
|
{
|
||||||
if (!io_op_defs[req->opcode].needs_file)
|
if (!io_op_defs[req->opcode].needs_file)
|
||||||
return 0;
|
return 0;
|
||||||
if (fd == -1 && io_op_defs[req->opcode].fd_non_neg)
|
if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
|
||||||
return 0;
|
return 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -4950,6 +5021,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
|
||||||
for (i = 0; i < nr; i++) {
|
for (i = 0; i < nr; i++) {
|
||||||
const struct io_uring_sqe *sqe;
|
const struct io_uring_sqe *sqe;
|
||||||
struct io_kiocb *req;
|
struct io_kiocb *req;
|
||||||
|
int err;
|
||||||
|
|
||||||
req = io_get_req(ctx, statep);
|
req = io_get_req(ctx, statep);
|
||||||
if (unlikely(!req)) {
|
if (unlikely(!req)) {
|
||||||
|
@ -4966,20 +5038,23 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
|
||||||
submitted++;
|
submitted++;
|
||||||
|
|
||||||
if (unlikely(req->opcode >= IORING_OP_LAST)) {
|
if (unlikely(req->opcode >= IORING_OP_LAST)) {
|
||||||
io_cqring_add_event(req, -EINVAL);
|
err = -EINVAL;
|
||||||
|
fail_req:
|
||||||
|
io_cqring_add_event(req, err);
|
||||||
io_double_put_req(req);
|
io_double_put_req(req);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (io_op_defs[req->opcode].needs_mm && !*mm) {
|
if (io_op_defs[req->opcode].needs_mm && !*mm) {
|
||||||
mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
|
mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
|
||||||
if (!mm_fault) {
|
if (unlikely(mm_fault)) {
|
||||||
use_mm(ctx->sqo_mm);
|
err = -EFAULT;
|
||||||
*mm = ctx->sqo_mm;
|
goto fail_req;
|
||||||
}
|
}
|
||||||
|
use_mm(ctx->sqo_mm);
|
||||||
|
*mm = ctx->sqo_mm;
|
||||||
}
|
}
|
||||||
|
|
||||||
req->has_user = *mm != NULL;
|
|
||||||
req->in_async = async;
|
req->in_async = async;
|
||||||
req->needs_fixed_file = async;
|
req->needs_fixed_file = async;
|
||||||
trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
|
trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
|
||||||
|
@ -6301,7 +6376,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
|
||||||
if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
|
if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
|
||||||
ctx->rings->sq_ring_entries)
|
ctx->rings->sq_ring_entries)
|
||||||
mask |= EPOLLOUT | EPOLLWRNORM;
|
mask |= EPOLLOUT | EPOLLWRNORM;
|
||||||
if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
|
if (io_cqring_events(ctx, false))
|
||||||
mask |= EPOLLIN | EPOLLRDNORM;
|
mask |= EPOLLIN | EPOLLRDNORM;
|
||||||
|
|
||||||
return mask;
|
return mask;
|
||||||
|
@ -6393,6 +6468,29 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
|
||||||
if (!cancel_req)
|
if (!cancel_req)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (cancel_req->flags & REQ_F_OVERFLOW) {
|
||||||
|
spin_lock_irq(&ctx->completion_lock);
|
||||||
|
list_del(&cancel_req->list);
|
||||||
|
cancel_req->flags &= ~REQ_F_OVERFLOW;
|
||||||
|
if (list_empty(&ctx->cq_overflow_list)) {
|
||||||
|
clear_bit(0, &ctx->sq_check_overflow);
|
||||||
|
clear_bit(0, &ctx->cq_check_overflow);
|
||||||
|
}
|
||||||
|
spin_unlock_irq(&ctx->completion_lock);
|
||||||
|
|
||||||
|
WRITE_ONCE(ctx->rings->cq_overflow,
|
||||||
|
atomic_inc_return(&ctx->cached_cq_overflow));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Put inflight ref and overflow ref. If that's
|
||||||
|
* all we had, then we're done with this request.
|
||||||
|
*/
|
||||||
|
if (refcount_sub_and_test(2, &cancel_req->refs)) {
|
||||||
|
io_put_req(cancel_req);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
|
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
|
||||||
io_put_req(cancel_req);
|
io_put_req(cancel_req);
|
||||||
schedule();
|
schedule();
|
||||||
|
@ -6405,6 +6503,13 @@ static int io_uring_flush(struct file *file, void *data)
|
||||||
struct io_ring_ctx *ctx = file->private_data;
|
struct io_ring_ctx *ctx = file->private_data;
|
||||||
|
|
||||||
io_uring_cancel_files(ctx, data);
|
io_uring_cancel_files(ctx, data);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the task is going away, cancel work it may have pending
|
||||||
|
*/
|
||||||
|
if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
|
||||||
|
io_wq_cancel_pid(ctx->io_wq, task_pid_vnr(current));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue