io_uring-5.10-2020-10-24
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+UQh8QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpl7WEADOTslFOof1RUPMb0Qvj4GO4cjvoFLW7KLt B83PmlW3WJpZrSiqZlrSPwcDELVphw67RL/2hp0jAfT1t00OdCOYQDmh7+kg9lnI fzu4NzfTKbriRWEtodIqZCiDoGXjzJGxNffhxPEt33YxRErI/fvuD/TzxwGGUInW OZ3Aze9Nj2DQ/eXhio48n4letTK6xNsjGDWvzwinthHWeBbID01isLlTei20PKU5 Dk1buueUuEr/vNjJwEeRd8yDXZeLZ/br3gw/3B71MJoi2PUaXvuS8DV4LmXg2SS5 yN0udSNk4AP/UlrVqN9bEqdbSTBSf2JIEW3k3/SEUjcjw6hMnbLeoW2vZx6Xvk6T vvAVHesLpCu8oEdWAkFm6Rb6ptJ1XpRrWWYxi1J1SB2Y8cGyGS1GoZWWPknM5M3I b1dNj18Bb+MmFvuKr7YYrb77tECuywxTHVGj6WwBOIlYrg44XQOumYYH9OmvZFz1 6vWaXjLPOIM8fpAKX5Tx5sAy/FMl17H8I5AD2bZVvD0h0MqzLnvHEYahcAfOfb9y qpkdGnbAWo6IIkCrDcSOV4q6dmWu3as9eSs1j/6Xl4WoJ2MT9C//Gpv7iNMxxozy CznEPcbA8N9QazQmoebtB3gTBVyGUUKVDdVNzleMj9KD6yPlKFZ6+FZdikX59I9M t9QGh3+gow== =xidc -----END PGP SIGNATURE----- Merge tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: - fsize was missed in previous unification of work flags - Few fixes cleaning up the flags unification creds cases (Pavel) - Fix NUMA affinities for completely unplugged/replugged node for io-wq - Two fallout fixes from the set_fs changes. One local to io_uring, one for the splice entry point that io_uring uses. - Linked timeout fixes (Pavel) - Removal of ->flush() ->files work-around that we don't need anymore with referenced files (Pavel) - Various cleanups (Pavel) * tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block: splice: change exported internal do_splice() helper to take kernel offset io_uring: make loop_rw_iter() use original user supplied pointers io_uring: remove req cancel in ->flush() io-wq: re-set NUMA node affinities if CPUs come online io_uring: don't reuse linked_timeout io_uring: unify fsize with def->work_flags io_uring: fix racy REQ_F_LINK_TIMEOUT clearing io_uring: do poll's hash_node init in common code io_uring: inline io_poll_task_handler() io_uring: remove extra ->file check in poll prep io_uring: make cached_cq_overflow non atomic_t io_uring: inline io_fail_links() io_uring: kill ref get/drop in personality init io_uring: flags-based creds init in queue
This commit is contained in:
commit
af0041875c
68
fs/io-wq.c
68
fs/io-wq.c
|
@ -19,7 +19,9 @@
|
|||
#include <linux/task_work.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include "../kernel/sched/sched.h"
|
||||
#include "io-wq.h"
|
||||
|
||||
#define WORKER_IDLE_TIMEOUT (5 * HZ)
|
||||
|
@ -123,9 +125,13 @@ struct io_wq {
|
|||
refcount_t refs;
|
||||
struct completion done;
|
||||
|
||||
struct hlist_node cpuhp_node;
|
||||
|
||||
refcount_t use_refs;
|
||||
};
|
||||
|
||||
static enum cpuhp_state io_wq_online;
|
||||
|
||||
static bool io_worker_get(struct io_worker *worker)
|
||||
{
|
||||
return refcount_inc_not_zero(&worker->ref);
|
||||
|
@ -187,7 +193,8 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
|
|||
worker->blkcg_css = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
|
||||
return dropped_lock;
|
||||
}
|
||||
|
||||
|
@ -483,7 +490,10 @@ static void io_impersonate_work(struct io_worker *worker,
|
|||
if ((work->flags & IO_WQ_WORK_CREDS) &&
|
||||
worker->cur_creds != work->identity->creds)
|
||||
io_wq_switch_creds(worker, work);
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
|
||||
if (work->flags & IO_WQ_WORK_FSIZE)
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
|
||||
else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
|
||||
io_wq_switch_blkcg(worker, work);
|
||||
#ifdef CONFIG_AUDIT
|
||||
current->loginuid = work->identity->loginuid;
|
||||
|
@ -1087,10 +1097,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
|
||||
if (!wq->wqes) {
|
||||
kfree(wq);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
if (!wq->wqes)
|
||||
goto err_wq;
|
||||
|
||||
ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
|
||||
if (ret)
|
||||
goto err_wqes;
|
||||
|
||||
wq->free_work = data->free_work;
|
||||
wq->do_work = data->do_work;
|
||||
|
@ -1098,6 +1110,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|||
/* caller must already hold a reference to this */
|
||||
wq->user = data->user;
|
||||
|
||||
ret = -ENOMEM;
|
||||
for_each_node(node) {
|
||||
struct io_wqe *wqe;
|
||||
int alloc_node = node;
|
||||
|
@ -1141,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|||
ret = PTR_ERR(wq->manager);
|
||||
complete(&wq->done);
|
||||
err:
|
||||
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
|
||||
for_each_node(node)
|
||||
kfree(wq->wqes[node]);
|
||||
err_wqes:
|
||||
kfree(wq->wqes);
|
||||
err_wq:
|
||||
kfree(wq);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
@ -1160,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq)
|
|||
{
|
||||
int node;
|
||||
|
||||
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
|
||||
|
||||
set_bit(IO_WQ_BIT_EXIT, &wq->state);
|
||||
if (wq->manager)
|
||||
kthread_stop(wq->manager);
|
||||
|
@ -1187,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
|
|||
{
|
||||
return wq->manager;
|
||||
}
|
||||
|
||||
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
|
||||
{
|
||||
struct task_struct *task = worker->task;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
rq = task_rq_lock(task, &rf);
|
||||
do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
|
||||
task->flags |= PF_NO_SETAFFINITY;
|
||||
task_rq_unlock(rq, task, &rf);
|
||||
return false;
|
||||
}
|
||||
|
||||
static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_node(i)
|
||||
io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int io_wq_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
|
||||
io_wq_cpu_online, NULL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
io_wq_online = ret;
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(io_wq_init);
|
||||
|
|
|
@ -17,6 +17,7 @@ enum {
|
|||
IO_WQ_WORK_MM = 128,
|
||||
IO_WQ_WORK_CREDS = 256,
|
||||
IO_WQ_WORK_BLKCG = 512,
|
||||
IO_WQ_WORK_FSIZE = 1024,
|
||||
|
||||
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
|
||||
};
|
||||
|
|
177
fs/io_uring.c
177
fs/io_uring.c
|
@ -277,7 +277,7 @@ struct io_ring_ctx {
|
|||
unsigned sq_mask;
|
||||
unsigned sq_thread_idle;
|
||||
unsigned cached_sq_dropped;
|
||||
atomic_t cached_cq_overflow;
|
||||
unsigned cached_cq_overflow;
|
||||
unsigned long sq_check_overflow;
|
||||
|
||||
struct list_head defer_list;
|
||||
|
@ -585,6 +585,7 @@ enum {
|
|||
REQ_F_BUFFER_SELECTED_BIT,
|
||||
REQ_F_NO_FILE_TABLE_BIT,
|
||||
REQ_F_WORK_INITIALIZED_BIT,
|
||||
REQ_F_LTIMEOUT_ACTIVE_BIT,
|
||||
|
||||
/* not a real bit, just to check we're not overflowing the space */
|
||||
__REQ_F_LAST_BIT,
|
||||
|
@ -614,7 +615,7 @@ enum {
|
|||
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
|
||||
/* must not punt to workers */
|
||||
REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
|
||||
/* has linked timeout */
|
||||
/* has or had linked timeout */
|
||||
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
|
||||
/* regular file */
|
||||
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
|
||||
|
@ -628,6 +629,8 @@ enum {
|
|||
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
|
||||
/* io_wq_work is initialized */
|
||||
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
|
||||
/* linked timeout is active, i.e. prepared by link's head */
|
||||
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
|
||||
};
|
||||
|
||||
struct async_poll {
|
||||
|
@ -750,8 +753,6 @@ struct io_op_def {
|
|||
unsigned pollout : 1;
|
||||
/* op supports buffer selection */
|
||||
unsigned buffer_select : 1;
|
||||
/* needs rlimit(RLIMIT_FSIZE) assigned */
|
||||
unsigned needs_fsize : 1;
|
||||
/* must always have async data allocated */
|
||||
unsigned needs_async_data : 1;
|
||||
/* size of async data needed, if any */
|
||||
|
@ -775,10 +776,10 @@ static const struct io_op_def io_op_defs[] = {
|
|||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.needs_fsize = 1,
|
||||
.needs_async_data = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
|
||||
.work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
|
||||
IO_WQ_WORK_FSIZE,
|
||||
},
|
||||
[IORING_OP_FSYNC] = {
|
||||
.needs_file = 1,
|
||||
|
@ -789,16 +790,16 @@ static const struct io_op_def io_op_defs[] = {
|
|||
.unbound_nonreg_file = 1,
|
||||
.pollin = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.work_flags = IO_WQ_WORK_BLKCG,
|
||||
.work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
|
||||
},
|
||||
[IORING_OP_WRITE_FIXED] = {
|
||||
.needs_file = 1,
|
||||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.needs_fsize = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.work_flags = IO_WQ_WORK_BLKCG,
|
||||
.work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
|
||||
IO_WQ_WORK_MM,
|
||||
},
|
||||
[IORING_OP_POLL_ADD] = {
|
||||
.needs_file = 1,
|
||||
|
@ -856,8 +857,7 @@ static const struct io_op_def io_op_defs[] = {
|
|||
},
|
||||
[IORING_OP_FALLOCATE] = {
|
||||
.needs_file = 1,
|
||||
.needs_fsize = 1,
|
||||
.work_flags = IO_WQ_WORK_BLKCG,
|
||||
.work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
|
||||
},
|
||||
[IORING_OP_OPENAT] = {
|
||||
.work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
|
||||
|
@ -887,9 +887,9 @@ static const struct io_op_def io_op_defs[] = {
|
|||
.needs_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.needs_fsize = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
|
||||
.work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
|
||||
IO_WQ_WORK_FSIZE,
|
||||
},
|
||||
[IORING_OP_FADVISE] = {
|
||||
.needs_file = 1,
|
||||
|
@ -1070,6 +1070,12 @@ static void io_init_identity(struct io_identity *id)
|
|||
refcount_set(&id->count, 1);
|
||||
}
|
||||
|
||||
static inline void __io_req_init_async(struct io_kiocb *req)
|
||||
{
|
||||
memset(&req->work, 0, sizeof(req->work));
|
||||
req->flags |= REQ_F_WORK_INITIALIZED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: must call io_req_init_async() for the first time you
|
||||
* touch any members of io_wq_work.
|
||||
|
@ -1081,8 +1087,7 @@ static inline void io_req_init_async(struct io_kiocb *req)
|
|||
if (req->flags & REQ_F_WORK_INITIALIZED)
|
||||
return;
|
||||
|
||||
memset(&req->work, 0, sizeof(req->work));
|
||||
req->flags |= REQ_F_WORK_INITIALIZED;
|
||||
__io_req_init_async(req);
|
||||
|
||||
/* Grab a ref if this isn't our static identity */
|
||||
req->work.identity = tctx->identity;
|
||||
|
@ -1174,7 +1179,7 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
|
|||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
return seq != ctx->cached_cq_tail
|
||||
+ atomic_read(&ctx->cached_cq_overflow);
|
||||
+ READ_ONCE(ctx->cached_cq_overflow);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -1285,8 +1290,11 @@ static bool io_grab_identity(struct io_kiocb *req)
|
|||
struct io_identity *id = req->work.identity;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE))
|
||||
return false;
|
||||
if (def->work_flags & IO_WQ_WORK_FSIZE) {
|
||||
if (id->fsize != rlimit(RLIMIT_FSIZE))
|
||||
return false;
|
||||
req->work.flags |= IO_WQ_WORK_FSIZE;
|
||||
}
|
||||
|
||||
if (!(req->work.flags & IO_WQ_WORK_FILES) &&
|
||||
(def->work_flags & IO_WQ_WORK_FILES) &&
|
||||
|
@ -1619,8 +1627,9 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
|
|||
WRITE_ONCE(cqe->res, req->result);
|
||||
WRITE_ONCE(cqe->flags, req->compl.cflags);
|
||||
} else {
|
||||
ctx->cached_cq_overflow++;
|
||||
WRITE_ONCE(ctx->rings->cq_overflow,
|
||||
atomic_inc_return(&ctx->cached_cq_overflow));
|
||||
ctx->cached_cq_overflow);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1662,8 +1671,8 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
|
|||
* then we cannot store the request for later flushing, we need
|
||||
* to drop it on the floor.
|
||||
*/
|
||||
WRITE_ONCE(ctx->rings->cq_overflow,
|
||||
atomic_inc_return(&ctx->cached_cq_overflow));
|
||||
ctx->cached_cq_overflow++;
|
||||
WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
|
||||
} else {
|
||||
if (list_empty(&ctx->cq_overflow_list)) {
|
||||
set_bit(0, &ctx->sq_check_overflow);
|
||||
|
@ -1865,6 +1874,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
|
|||
link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
|
||||
if (link->opcode != IORING_OP_LINK_TIMEOUT)
|
||||
return false;
|
||||
/*
|
||||
* Can happen if a linked timeout fired and link had been like
|
||||
* req -> link t-out -> link t-out [-> ...]
|
||||
*/
|
||||
if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE))
|
||||
return false;
|
||||
|
||||
list_del_init(&link->link_list);
|
||||
wake_ev = io_link_cancel_timeout(link);
|
||||
|
@ -1908,10 +1923,12 @@ static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
|
|||
/*
|
||||
* Called if REQ_F_LINK_HEAD is set, and we fail the head request
|
||||
*/
|
||||
static void __io_fail_links(struct io_kiocb *req)
|
||||
static void io_fail_links(struct io_kiocb *req)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
while (!list_empty(&req->link_list)) {
|
||||
struct io_kiocb *link = list_first_entry(&req->link_list,
|
||||
struct io_kiocb, link_list);
|
||||
|
@ -1933,15 +1950,6 @@ static void __io_fail_links(struct io_kiocb *req)
|
|||
}
|
||||
|
||||
io_commit_cqring(ctx);
|
||||
}
|
||||
|
||||
static void io_fail_links(struct io_kiocb *req)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||
__io_fail_links(req);
|
||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||
|
||||
io_cqring_ev_posted(ctx);
|
||||
|
@ -3109,9 +3117,10 @@ static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
|
|||
* For files that don't have ->read_iter() and ->write_iter(), handle them
|
||||
* by looping over ->read() or ->write() manually.
|
||||
*/
|
||||
static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
|
||||
struct iov_iter *iter)
|
||||
static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
|
||||
{
|
||||
struct kiocb *kiocb = &req->rw.kiocb;
|
||||
struct file *file = req->file;
|
||||
ssize_t ret = 0;
|
||||
|
||||
/*
|
||||
|
@ -3131,11 +3140,8 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
|
|||
if (!iov_iter_is_bvec(iter)) {
|
||||
iovec = iov_iter_iovec(iter);
|
||||
} else {
|
||||
/* fixed buffers import bvec */
|
||||
iovec.iov_base = kmap(iter->bvec->bv_page)
|
||||
+ iter->iov_offset;
|
||||
iovec.iov_len = min(iter->count,
|
||||
iter->bvec->bv_len - iter->iov_offset);
|
||||
iovec.iov_base = u64_to_user_ptr(req->rw.addr);
|
||||
iovec.iov_len = req->rw.len;
|
||||
}
|
||||
|
||||
if (rw == READ) {
|
||||
|
@ -3146,9 +3152,6 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
|
|||
iovec.iov_len, io_kiocb_ppos(kiocb));
|
||||
}
|
||||
|
||||
if (iov_iter_is_bvec(iter))
|
||||
kunmap(iter->bvec->bv_page);
|
||||
|
||||
if (nr < 0) {
|
||||
if (!ret)
|
||||
ret = nr;
|
||||
|
@ -3157,6 +3160,8 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
|
|||
ret += nr;
|
||||
if (nr != iovec.iov_len)
|
||||
break;
|
||||
req->rw.len -= nr;
|
||||
req->rw.addr += nr;
|
||||
iov_iter_advance(iter, nr);
|
||||
}
|
||||
|
||||
|
@ -3346,7 +3351,7 @@ static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
|
|||
if (req->file->f_op->read_iter)
|
||||
return call_read_iter(req->file, &req->rw.kiocb, iter);
|
||||
else if (req->file->f_op->read)
|
||||
return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter);
|
||||
return loop_rw_iter(READ, req, iter);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -3537,7 +3542,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
|
|||
if (req->file->f_op->write_iter)
|
||||
ret2 = call_write_iter(req->file, kiocb, iter);
|
||||
else if (req->file->f_op->write)
|
||||
ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter);
|
||||
ret2 = loop_rw_iter(WRITE, req, iter);
|
||||
else
|
||||
ret2 = -EINVAL;
|
||||
|
||||
|
@ -4927,32 +4932,25 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
|
|||
io_commit_cqring(ctx);
|
||||
}
|
||||
|
||||
static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
if (io_poll_rewait(req, &req->poll)) {
|
||||
spin_unlock_irq(&ctx->completion_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
hash_del(&req->hash_node);
|
||||
io_poll_complete(req, req->result, 0);
|
||||
spin_unlock_irq(&ctx->completion_lock);
|
||||
|
||||
*nxt = io_put_req_find_next(req);
|
||||
io_cqring_ev_posted(ctx);
|
||||
}
|
||||
|
||||
static void io_poll_task_func(struct callback_head *cb)
|
||||
{
|
||||
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_kiocb *nxt = NULL;
|
||||
struct io_kiocb *nxt;
|
||||
|
||||
if (io_poll_rewait(req, &req->poll)) {
|
||||
spin_unlock_irq(&ctx->completion_lock);
|
||||
} else {
|
||||
hash_del(&req->hash_node);
|
||||
io_poll_complete(req, req->result, 0);
|
||||
spin_unlock_irq(&ctx->completion_lock);
|
||||
|
||||
nxt = io_put_req_find_next(req);
|
||||
io_cqring_ev_posted(ctx);
|
||||
if (nxt)
|
||||
__io_req_task_submit(nxt);
|
||||
}
|
||||
|
||||
io_poll_task_handler(req, &nxt);
|
||||
if (nxt)
|
||||
__io_req_task_submit(nxt);
|
||||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
|
||||
|
@ -5106,6 +5104,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
|
|||
struct io_ring_ctx *ctx = req->ctx;
|
||||
bool cancel = false;
|
||||
|
||||
INIT_HLIST_NODE(&req->hash_node);
|
||||
io_init_poll_iocb(poll, mask, wake_func);
|
||||
poll->file = req->file;
|
||||
poll->wait.private = req;
|
||||
|
@ -5167,7 +5166,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
|
|||
|
||||
req->flags |= REQ_F_POLLED;
|
||||
req->apoll = apoll;
|
||||
INIT_HLIST_NODE(&req->hash_node);
|
||||
|
||||
mask = 0;
|
||||
if (def->pollin)
|
||||
|
@ -5349,8 +5347,6 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
|||
return -EINVAL;
|
||||
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
|
||||
return -EINVAL;
|
||||
if (!poll->file)
|
||||
return -EBADF;
|
||||
|
||||
events = READ_ONCE(sqe->poll32_events);
|
||||
#ifdef __BIG_ENDIAN
|
||||
|
@ -5368,7 +5364,6 @@ static int io_poll_add(struct io_kiocb *req)
|
|||
struct io_poll_table ipt;
|
||||
__poll_t mask;
|
||||
|
||||
INIT_HLIST_NODE(&req->hash_node);
|
||||
ipt.pt._qproc = io_poll_queue_proc;
|
||||
|
||||
mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
|
||||
|
@ -6118,10 +6113,9 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
|
|||
if (!list_empty(&req->link_list)) {
|
||||
prev = list_entry(req->link_list.prev, struct io_kiocb,
|
||||
link_list);
|
||||
if (refcount_inc_not_zero(&prev->refs)) {
|
||||
if (refcount_inc_not_zero(&prev->refs))
|
||||
list_del_init(&req->link_list);
|
||||
prev->flags &= ~REQ_F_LINK_TIMEOUT;
|
||||
} else
|
||||
else
|
||||
prev = NULL;
|
||||
}
|
||||
|
||||
|
@ -6178,6 +6172,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
|
|||
if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
|
||||
return NULL;
|
||||
|
||||
nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
|
||||
req->flags |= REQ_F_LINK_TIMEOUT;
|
||||
return nxt;
|
||||
}
|
||||
|
@ -6192,7 +6187,8 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
|
|||
again:
|
||||
linked_timeout = io_prep_linked_timeout(req);
|
||||
|
||||
if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds &&
|
||||
if ((req->flags & REQ_F_WORK_INITIALIZED) &&
|
||||
(req->work.flags & IO_WQ_WORK_CREDS) &&
|
||||
req->work.identity->creds != current_cred()) {
|
||||
if (old_creds)
|
||||
revert_creds(old_creds);
|
||||
|
@ -6200,7 +6196,6 @@ again:
|
|||
old_creds = NULL; /* restored original creds */
|
||||
else
|
||||
old_creds = override_creds(req->work.identity->creds);
|
||||
req->work.flags |= IO_WQ_WORK_CREDS;
|
||||
}
|
||||
|
||||
ret = io_issue_sqe(req, true, cs);
|
||||
|
@ -6241,8 +6236,10 @@ punt:
|
|||
if (nxt) {
|
||||
req = nxt;
|
||||
|
||||
if (req->flags & REQ_F_FORCE_ASYNC)
|
||||
if (req->flags & REQ_F_FORCE_ASYNC) {
|
||||
linked_timeout = NULL;
|
||||
goto punt;
|
||||
}
|
||||
goto again;
|
||||
}
|
||||
exit:
|
||||
|
@ -6505,12 +6502,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|||
if (id) {
|
||||
struct io_identity *iod;
|
||||
|
||||
io_req_init_async(req);
|
||||
iod = idr_find(&ctx->personality_idr, id);
|
||||
if (unlikely(!iod))
|
||||
return -EINVAL;
|
||||
refcount_inc(&iod->count);
|
||||
io_put_identity(current->io_uring, req);
|
||||
|
||||
__io_req_init_async(req);
|
||||
get_cred(iod->creds);
|
||||
req->work.identity = iod;
|
||||
req->work.flags |= IO_WQ_WORK_CREDS;
|
||||
|
@ -8686,19 +8683,11 @@ static void io_uring_del_task_file(struct file *file)
|
|||
fput(file);
|
||||
}
|
||||
|
||||
static void __io_uring_attempt_task_drop(struct file *file)
|
||||
{
|
||||
struct file *old = xa_load(¤t->io_uring->xa, (unsigned long)file);
|
||||
|
||||
if (old == file)
|
||||
io_uring_del_task_file(file);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop task note for this file if we're the only ones that hold it after
|
||||
* pending fput()
|
||||
*/
|
||||
static void io_uring_attempt_task_drop(struct file *file, bool exiting)
|
||||
static void io_uring_attempt_task_drop(struct file *file)
|
||||
{
|
||||
if (!current->io_uring)
|
||||
return;
|
||||
|
@ -8706,10 +8695,9 @@ static void io_uring_attempt_task_drop(struct file *file, bool exiting)
|
|||
* fput() is pending, will be 2 if the only other ref is our potential
|
||||
* task file note. If the task is exiting, drop regardless of count.
|
||||
*/
|
||||
if (!exiting && atomic_long_read(&file->f_count) != 2)
|
||||
return;
|
||||
|
||||
__io_uring_attempt_task_drop(file);
|
||||
if (fatal_signal_pending(current) || (current->flags & PF_EXITING) ||
|
||||
atomic_long_read(&file->f_count) == 2)
|
||||
io_uring_del_task_file(file);
|
||||
}
|
||||
|
||||
void __io_uring_files_cancel(struct files_struct *files)
|
||||
|
@ -8767,16 +8755,7 @@ void __io_uring_task_cancel(void)
|
|||
|
||||
static int io_uring_flush(struct file *file, void *data)
|
||||
{
|
||||
struct io_ring_ctx *ctx = file->private_data;
|
||||
|
||||
/*
|
||||
* If the task is going away, cancel work it may have pending
|
||||
*/
|
||||
if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
|
||||
data = NULL;
|
||||
|
||||
io_uring_cancel_task_requests(ctx, data);
|
||||
io_uring_attempt_task_drop(file, !data);
|
||||
io_uring_attempt_task_drop(file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
63
fs/splice.c
63
fs/splice.c
|
@ -1005,9 +1005,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
|
|||
/*
|
||||
* Determine where to splice to/from.
|
||||
*/
|
||||
long do_splice(struct file *in, loff_t __user *off_in,
|
||||
struct file *out, loff_t __user *off_out,
|
||||
size_t len, unsigned int flags)
|
||||
long do_splice(struct file *in, loff_t *off_in, struct file *out,
|
||||
loff_t *off_out, size_t len, unsigned int flags)
|
||||
{
|
||||
struct pipe_inode_info *ipipe;
|
||||
struct pipe_inode_info *opipe;
|
||||
|
@ -1041,8 +1040,7 @@ long do_splice(struct file *in, loff_t __user *off_in,
|
|||
if (off_out) {
|
||||
if (!(out->f_mode & FMODE_PWRITE))
|
||||
return -EINVAL;
|
||||
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
|
||||
return -EFAULT;
|
||||
offset = *off_out;
|
||||
} else {
|
||||
offset = out->f_pos;
|
||||
}
|
||||
|
@ -1063,8 +1061,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
|
|||
|
||||
if (!off_out)
|
||||
out->f_pos = offset;
|
||||
else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
*off_out = offset;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1075,8 +1073,7 @@ long do_splice(struct file *in, loff_t __user *off_in,
|
|||
if (off_in) {
|
||||
if (!(in->f_mode & FMODE_PREAD))
|
||||
return -EINVAL;
|
||||
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
|
||||
return -EFAULT;
|
||||
offset = *off_in;
|
||||
} else {
|
||||
offset = in->f_pos;
|
||||
}
|
||||
|
@ -1100,8 +1097,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
|
|||
wakeup_pipe_readers(opipe);
|
||||
if (!off_in)
|
||||
in->f_pos = offset;
|
||||
else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
*off_in = offset;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1109,6 +1106,46 @@ long do_splice(struct file *in, loff_t __user *off_in,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static long __do_splice(struct file *in, loff_t __user *off_in,
|
||||
struct file *out, loff_t __user *off_out,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
struct pipe_inode_info *ipipe;
|
||||
struct pipe_inode_info *opipe;
|
||||
loff_t offset, *__off_in = NULL, *__off_out = NULL;
|
||||
long ret;
|
||||
|
||||
ipipe = get_pipe_info(in, true);
|
||||
opipe = get_pipe_info(out, true);
|
||||
|
||||
if (ipipe && off_in)
|
||||
return -ESPIPE;
|
||||
if (opipe && off_out)
|
||||
return -ESPIPE;
|
||||
|
||||
if (off_out) {
|
||||
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
|
||||
return -EFAULT;
|
||||
__off_out = &offset;
|
||||
}
|
||||
if (off_in) {
|
||||
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
|
||||
return -EFAULT;
|
||||
__off_in = &offset;
|
||||
}
|
||||
|
||||
ret = do_splice(in, __off_in, out, __off_out, len, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
|
||||
return -EFAULT;
|
||||
if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
|
||||
return -EFAULT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iter_to_pipe(struct iov_iter *from,
|
||||
struct pipe_inode_info *pipe,
|
||||
unsigned flags)
|
||||
|
@ -1303,8 +1340,8 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
|
|||
if (in.file) {
|
||||
out = fdget(fd_out);
|
||||
if (out.file) {
|
||||
error = do_splice(in.file, off_in, out.file, off_out,
|
||||
len, flags);
|
||||
error = __do_splice(in.file, off_in, out.file, off_out,
|
||||
len, flags);
|
||||
fdput(out);
|
||||
}
|
||||
fdput(in);
|
||||
|
|
|
@ -78,8 +78,8 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
|
|||
struct pipe_buffer *);
|
||||
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
|
||||
splice_direct_actor *);
|
||||
extern long do_splice(struct file *in, loff_t __user *off_in,
|
||||
struct file *out, loff_t __user *off_out,
|
||||
extern long do_splice(struct file *in, loff_t *off_in,
|
||||
struct file *out, loff_t *off_out,
|
||||
size_t len, unsigned int flags);
|
||||
|
||||
extern long do_tee(struct file *in, struct file *out, size_t len,
|
||||
|
|
Loading…
Reference in New Issue