for-5.7/io_uring-2020-03-29
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl6BJEMQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpie7D/9gN4zhykYDfcgamfxMtTbpla2PdTnWoJxP fjy/Nx2FySakmccaiCGQSQ1rzD1L67UQkJgEH6hPTomJvA4FaOmJ+ZSaExMy55LH ZT+nD3zQ9SCuA0DEpfxbsCP1tbnoXSMQNt8Tyh0x8PAoxp5bI0eRczOju1QWLWTS tjBEMZNipN6krrV9RPWT0S5Z31/yGr/sXprCSHFV9Ypzwrx58Tj2i6F9gR7FVbLs nV2/O8taEn0sMQIz8TVHKol/TBalluGrC4M/bOeS3faP3BPN4TT24Gtc0LAKEibk F49/SX7FzwhOdl43Bdkbe2bbL86p+zOLSf0IMBwMm0DJl4aiOljRUYTSYRolgGgm Ebw9QhemTwbxxeD2nEriA4EAeYvTx69RDlN2eVilwwfJ48Xz9fVm3GNYG7LISeON k3/TyZOBQH2SZ2Hc3oF2Mq9j1UPHXZHUUsUNlNcN+aM9SFHcWkRi6xZWemTJHJZ4 zFss5RZHo0+RLBa8rrx8xaO8iWrc73+FuRhr9eSsmyPIj+OZ4ezEFRRRHwtk2fgv dZvD413AyCI1c+3LlBusESMsrtXyY8p9O9buNTzHy3ZUtHe0ERmYV2m/a83A5pXo Kia/5aJbPIC61bAkCCkiVo+W9OASJ6o5+3CXl5sM9lGTbDXjcofzewmd+RHPestx xVbzeR9UIw== =bYLJ -----END PGP SIGNATURE----- Merge tag 'for-5.7/io_uring-2020-03-29' of git://git.kernel.dk/linux-block Pull io_uring updates from Jens Axboe: "Here are the io_uring changes for this merge window. Light on new features this time around (just splice + buffer selection), lots of cleanups, fixes, and improvements to existing support. In particular, this contains: - Cleanup fixed file update handling for stack fallback (Hillf) - Re-work of how pollable async IO is handled, we no longer require thread offload to handle that. Instead we rely using poll to drive this, with task_work execution. - In conjunction with the above, allow expendable buffer selection, so that poll+recv (for example) no longer has to be a split operation. - Make sure we honor RLIMIT_FSIZE for buffered writes - Add support for splice (Pavel) - Linked work inheritance fixes and optimizations (Pavel) - Async work fixes and cleanups (Pavel) - Improve io-wq locking (Pavel) - Hashed link write improvements (Pavel) - SETUP_IOPOLL|SETUP_SQPOLL improvements (Xiaoguang)" * tag 'for-5.7/io_uring-2020-03-29' of git://git.kernel.dk/linux-block: (54 commits) io_uring: cleanup io_alloc_async_ctx() io_uring: fix missing 'return' in comment io-wq: handle hashed writes in chains io-uring: drop 'free_pfile' in struct io_file_put io-uring: drop completion when removing file io_uring: Fix ->data corruption on re-enqueue io-wq: close cancel gap for hashed linked work io_uring: make spdxcheck.py happy io_uring: honor original task RLIMIT_FSIZE io-wq: hash dependent work io-wq: split hashing and enqueueing io-wq: don't resched if there is no work io-wq: remove duplicated cancel code io_uring: fix truncated async read/readv and write/writev retry io_uring: dual license io_uring.h uapi header io_uring: io_uring_enter(2) don't poll while SETUP_IOPOLL|SETUP_SQPOLL enabled io_uring: Fix unused function warnings io_uring: add end-of-bits marker and build time verify it io_uring: provide means of removing buffers io_uring: add IOSQE_BUFFER_SELECT support for IORING_OP_RECVMSG ...
This commit is contained in:
commit
e59cd88028
372
fs/io-wq.c
372
fs/io-wq.c
|
@ -69,6 +69,8 @@ struct io_worker {
|
||||||
#define IO_WQ_HASH_ORDER 5
|
#define IO_WQ_HASH_ORDER 5
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
|
||||||
|
|
||||||
struct io_wqe_acct {
|
struct io_wqe_acct {
|
||||||
unsigned nr_workers;
|
unsigned nr_workers;
|
||||||
unsigned max_workers;
|
unsigned max_workers;
|
||||||
|
@ -98,6 +100,7 @@ struct io_wqe {
|
||||||
struct list_head all_list;
|
struct list_head all_list;
|
||||||
|
|
||||||
struct io_wq *wq;
|
struct io_wq *wq;
|
||||||
|
struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -107,8 +110,7 @@ struct io_wq {
|
||||||
struct io_wqe **wqes;
|
struct io_wqe **wqes;
|
||||||
unsigned long state;
|
unsigned long state;
|
||||||
|
|
||||||
get_work_fn *get_work;
|
free_work_fn *free_work;
|
||||||
put_work_fn *put_work;
|
|
||||||
|
|
||||||
struct task_struct *manager;
|
struct task_struct *manager;
|
||||||
struct user_struct *user;
|
struct user_struct *user;
|
||||||
|
@ -376,26 +378,35 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
|
||||||
return __io_worker_unuse(wqe, worker);
|
return __io_worker_unuse(wqe, worker);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash)
|
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
|
||||||
|
{
|
||||||
|
return work->flags >> IO_WQ_HASH_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
|
||||||
__must_hold(wqe->lock)
|
__must_hold(wqe->lock)
|
||||||
{
|
{
|
||||||
struct io_wq_work_node *node, *prev;
|
struct io_wq_work_node *node, *prev;
|
||||||
struct io_wq_work *work;
|
struct io_wq_work *work, *tail;
|
||||||
|
unsigned int hash;
|
||||||
|
|
||||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||||
work = container_of(node, struct io_wq_work, list);
|
work = container_of(node, struct io_wq_work, list);
|
||||||
|
|
||||||
/* not hashed, can run anytime */
|
/* not hashed, can run anytime */
|
||||||
if (!(work->flags & IO_WQ_WORK_HASHED)) {
|
if (!io_wq_is_hashed(work)) {
|
||||||
wq_node_del(&wqe->work_list, node, prev);
|
wq_list_del(&wqe->work_list, node, prev);
|
||||||
return work;
|
return work;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* hashed, can run if not already running */
|
/* hashed, can run if not already running */
|
||||||
*hash = work->flags >> IO_WQ_HASH_SHIFT;
|
hash = io_get_work_hash(work);
|
||||||
if (!(wqe->hash_map & BIT_ULL(*hash))) {
|
if (!(wqe->hash_map & BIT(hash))) {
|
||||||
wqe->hash_map |= BIT_ULL(*hash);
|
wqe->hash_map |= BIT(hash);
|
||||||
wq_node_del(&wqe->work_list, node, prev);
|
/* all items with this hash lie in [work, tail] */
|
||||||
|
tail = wqe->hash_tail[hash];
|
||||||
|
wqe->hash_tail[hash] = NULL;
|
||||||
|
wq_list_cut(&wqe->work_list, &tail->list, prev);
|
||||||
return work;
|
return work;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -440,16 +451,49 @@ static void io_wq_switch_creds(struct io_worker *worker,
|
||||||
worker->saved_creds = old_creds;
|
worker->saved_creds = old_creds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void io_impersonate_work(struct io_worker *worker,
|
||||||
|
struct io_wq_work *work)
|
||||||
|
{
|
||||||
|
if (work->files && current->files != work->files) {
|
||||||
|
task_lock(current);
|
||||||
|
current->files = work->files;
|
||||||
|
task_unlock(current);
|
||||||
|
}
|
||||||
|
if (work->fs && current->fs != work->fs)
|
||||||
|
current->fs = work->fs;
|
||||||
|
if (work->mm != worker->mm)
|
||||||
|
io_wq_switch_mm(worker, work);
|
||||||
|
if (worker->cur_creds != work->creds)
|
||||||
|
io_wq_switch_creds(worker, work);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_assign_current_work(struct io_worker *worker,
|
||||||
|
struct io_wq_work *work)
|
||||||
|
{
|
||||||
|
if (work) {
|
||||||
|
/* flush pending signals before assigning new work */
|
||||||
|
if (signal_pending(current))
|
||||||
|
flush_signals(current);
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock_irq(&worker->lock);
|
||||||
|
worker->cur_work = work;
|
||||||
|
spin_unlock_irq(&worker->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
|
||||||
|
|
||||||
static void io_worker_handle_work(struct io_worker *worker)
|
static void io_worker_handle_work(struct io_worker *worker)
|
||||||
__releases(wqe->lock)
|
__releases(wqe->lock)
|
||||||
{
|
{
|
||||||
struct io_wq_work *work, *old_work = NULL, *put_work = NULL;
|
|
||||||
struct io_wqe *wqe = worker->wqe;
|
struct io_wqe *wqe = worker->wqe;
|
||||||
struct io_wq *wq = wqe->wq;
|
struct io_wq *wq = wqe->wq;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned hash = -1U;
|
struct io_wq_work *work;
|
||||||
|
unsigned int hash;
|
||||||
|
get_next:
|
||||||
/*
|
/*
|
||||||
* If we got some work, mark us as busy. If we didn't, but
|
* If we got some work, mark us as busy. If we didn't, but
|
||||||
* the list isn't empty, it means we stalled on hashed work.
|
* the list isn't empty, it means we stalled on hashed work.
|
||||||
|
@ -457,81 +501,60 @@ static void io_worker_handle_work(struct io_worker *worker)
|
||||||
* can't make progress, any work completion or insertion will
|
* can't make progress, any work completion or insertion will
|
||||||
* clear the stalled flag.
|
* clear the stalled flag.
|
||||||
*/
|
*/
|
||||||
work = io_get_next_work(wqe, &hash);
|
work = io_get_next_work(wqe);
|
||||||
if (work)
|
if (work)
|
||||||
__io_worker_busy(wqe, worker, work);
|
__io_worker_busy(wqe, worker, work);
|
||||||
else if (!wq_list_empty(&wqe->work_list))
|
else if (!wq_list_empty(&wqe->work_list))
|
||||||
wqe->flags |= IO_WQE_FLAG_STALLED;
|
wqe->flags |= IO_WQE_FLAG_STALLED;
|
||||||
|
|
||||||
spin_unlock_irq(&wqe->lock);
|
spin_unlock_irq(&wqe->lock);
|
||||||
if (put_work && wq->put_work)
|
|
||||||
wq->put_work(old_work);
|
|
||||||
if (!work)
|
if (!work)
|
||||||
break;
|
break;
|
||||||
next:
|
io_assign_current_work(worker, work);
|
||||||
/* flush any pending signals before assigning new work */
|
|
||||||
if (signal_pending(current))
|
|
||||||
flush_signals(current);
|
|
||||||
|
|
||||||
cond_resched();
|
/* handle a whole dependent link */
|
||||||
|
do {
|
||||||
|
struct io_wq_work *old_work, *next_hashed, *linked;
|
||||||
|
|
||||||
spin_lock_irq(&worker->lock);
|
next_hashed = wq_next_work(work);
|
||||||
worker->cur_work = work;
|
io_impersonate_work(worker, work);
|
||||||
spin_unlock_irq(&worker->lock);
|
/*
|
||||||
|
* OK to set IO_WQ_WORK_CANCEL even for uncancellable
|
||||||
|
* work, the worker function will do the right thing.
|
||||||
|
*/
|
||||||
|
if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
|
||||||
|
work->flags |= IO_WQ_WORK_CANCEL;
|
||||||
|
|
||||||
if (work->flags & IO_WQ_WORK_CB)
|
hash = io_get_work_hash(work);
|
||||||
work->func(&work);
|
linked = old_work = work;
|
||||||
|
linked->func(&linked);
|
||||||
|
linked = (old_work == linked) ? NULL : linked;
|
||||||
|
|
||||||
if (work->files && current->files != work->files) {
|
work = next_hashed;
|
||||||
task_lock(current);
|
if (!work && linked && !io_wq_is_hashed(linked)) {
|
||||||
current->files = work->files;
|
work = linked;
|
||||||
task_unlock(current);
|
linked = NULL;
|
||||||
}
|
}
|
||||||
if (work->fs && current->fs != work->fs)
|
io_assign_current_work(worker, work);
|
||||||
current->fs = work->fs;
|
wq->free_work(old_work);
|
||||||
if (work->mm != worker->mm)
|
|
||||||
io_wq_switch_mm(worker, work);
|
|
||||||
if (worker->cur_creds != work->creds)
|
|
||||||
io_wq_switch_creds(worker, work);
|
|
||||||
/*
|
|
||||||
* OK to set IO_WQ_WORK_CANCEL even for uncancellable work,
|
|
||||||
* the worker function will do the right thing.
|
|
||||||
*/
|
|
||||||
if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
|
|
||||||
work->flags |= IO_WQ_WORK_CANCEL;
|
|
||||||
if (worker->mm)
|
|
||||||
work->flags |= IO_WQ_WORK_HAS_MM;
|
|
||||||
|
|
||||||
if (wq->get_work) {
|
if (linked)
|
||||||
put_work = work;
|
io_wqe_enqueue(wqe, linked);
|
||||||
wq->get_work(work);
|
|
||||||
}
|
|
||||||
|
|
||||||
old_work = work;
|
if (hash != -1U && !next_hashed) {
|
||||||
work->func(&work);
|
spin_lock_irq(&wqe->lock);
|
||||||
|
wqe->hash_map &= ~BIT_ULL(hash);
|
||||||
spin_lock_irq(&worker->lock);
|
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||||
worker->cur_work = NULL;
|
/* dependent work is not hashed */
|
||||||
spin_unlock_irq(&worker->lock);
|
hash = -1U;
|
||||||
|
/* skip unnecessary unlock-lock wqe->lock */
|
||||||
|
if (!work)
|
||||||
|
goto get_next;
|
||||||
|
spin_unlock_irq(&wqe->lock);
|
||||||
|
}
|
||||||
|
} while (work);
|
||||||
|
|
||||||
spin_lock_irq(&wqe->lock);
|
spin_lock_irq(&wqe->lock);
|
||||||
|
|
||||||
if (hash != -1U) {
|
|
||||||
wqe->hash_map &= ~BIT_ULL(hash);
|
|
||||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
|
||||||
}
|
|
||||||
if (work && work != old_work) {
|
|
||||||
spin_unlock_irq(&wqe->lock);
|
|
||||||
|
|
||||||
if (put_work && wq->put_work) {
|
|
||||||
wq->put_work(put_work);
|
|
||||||
put_work = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* dependent work not hashed */
|
|
||||||
hash = -1U;
|
|
||||||
goto next;
|
|
||||||
}
|
|
||||||
} while (1);
|
} while (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -747,17 +770,40 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_run_cancel(struct io_wq_work *work)
|
static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
|
||||||
{
|
{
|
||||||
|
struct io_wq *wq = wqe->wq;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct io_wq_work *old_work = work;
|
struct io_wq_work *old_work = work;
|
||||||
|
|
||||||
work->flags |= IO_WQ_WORK_CANCEL;
|
work->flags |= IO_WQ_WORK_CANCEL;
|
||||||
work->func(&work);
|
work->func(&work);
|
||||||
work = (work == old_work) ? NULL : work;
|
work = (work == old_work) ? NULL : work;
|
||||||
|
wq->free_work(old_work);
|
||||||
} while (work);
|
} while (work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
|
||||||
|
{
|
||||||
|
unsigned int hash;
|
||||||
|
struct io_wq_work *tail;
|
||||||
|
|
||||||
|
if (!io_wq_is_hashed(work)) {
|
||||||
|
append:
|
||||||
|
wq_list_add_tail(&work->list, &wqe->work_list);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash = io_get_work_hash(work);
|
||||||
|
tail = wqe->hash_tail[hash];
|
||||||
|
wqe->hash_tail[hash] = work;
|
||||||
|
if (!tail)
|
||||||
|
goto append;
|
||||||
|
|
||||||
|
wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
|
||||||
|
}
|
||||||
|
|
||||||
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||||
{
|
{
|
||||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||||
|
@ -771,13 +817,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||||
* It's close enough to not be an issue, fork() has the same delay.
|
* It's close enough to not be an issue, fork() has the same delay.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
|
if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
|
||||||
io_run_cancel(work);
|
io_run_cancel(work, wqe);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
work_flags = work->flags;
|
work_flags = work->flags;
|
||||||
spin_lock_irqsave(&wqe->lock, flags);
|
spin_lock_irqsave(&wqe->lock, flags);
|
||||||
wq_list_add_tail(&work->list, &wqe->work_list);
|
io_wqe_insert_work(wqe, work);
|
||||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||||
spin_unlock_irqrestore(&wqe->lock, flags);
|
spin_unlock_irqrestore(&wqe->lock, flags);
|
||||||
|
|
||||||
|
@ -794,19 +840,15 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enqueue work, hashed by some key. Work items that hash to the same value
|
* Work items that hash to the same value will not be done in parallel.
|
||||||
* will not be done in parallel. Used to limit concurrent writes, generally
|
* Used to limit concurrent writes, generally hashed by inode.
|
||||||
* hashed by inode.
|
|
||||||
*/
|
*/
|
||||||
void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val)
|
void io_wq_hash_work(struct io_wq_work *work, void *val)
|
||||||
{
|
{
|
||||||
struct io_wqe *wqe = wq->wqes[numa_node_id()];
|
unsigned int bit;
|
||||||
unsigned bit;
|
|
||||||
|
|
||||||
|
|
||||||
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
|
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
|
||||||
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
|
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
|
||||||
io_wqe_enqueue(wqe, work);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
|
static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
|
||||||
|
@ -856,14 +898,13 @@ void io_wq_cancel_all(struct io_wq *wq)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct io_cb_cancel_data {
|
struct io_cb_cancel_data {
|
||||||
struct io_wqe *wqe;
|
work_cancel_fn *fn;
|
||||||
work_cancel_fn *cancel;
|
void *data;
|
||||||
void *caller_data;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
|
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||||
{
|
{
|
||||||
struct io_cb_cancel_data *data = cancel_data;
|
struct io_cb_cancel_data *match = data;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
|
@ -874,83 +915,7 @@ static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
|
||||||
spin_lock_irqsave(&worker->lock, flags);
|
spin_lock_irqsave(&worker->lock, flags);
|
||||||
if (worker->cur_work &&
|
if (worker->cur_work &&
|
||||||
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
|
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
|
||||||
data->cancel(worker->cur_work, data->caller_data)) {
|
match->fn(worker->cur_work, match->data)) {
|
||||||
send_sig(SIGINT, worker->task, 1);
|
|
||||||
ret = true;
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&worker->lock, flags);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,
|
|
||||||
work_cancel_fn *cancel,
|
|
||||||
void *cancel_data)
|
|
||||||
{
|
|
||||||
struct io_cb_cancel_data data = {
|
|
||||||
.wqe = wqe,
|
|
||||||
.cancel = cancel,
|
|
||||||
.caller_data = cancel_data,
|
|
||||||
};
|
|
||||||
struct io_wq_work_node *node, *prev;
|
|
||||||
struct io_wq_work *work;
|
|
||||||
unsigned long flags;
|
|
||||||
bool found = false;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&wqe->lock, flags);
|
|
||||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
|
||||||
work = container_of(node, struct io_wq_work, list);
|
|
||||||
|
|
||||||
if (cancel(work, cancel_data)) {
|
|
||||||
wq_node_del(&wqe->work_list, node, prev);
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&wqe->lock, flags);
|
|
||||||
|
|
||||||
if (found) {
|
|
||||||
io_run_cancel(work);
|
|
||||||
return IO_WQ_CANCEL_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
found = io_wq_for_each_worker(wqe, io_work_cancel, &data);
|
|
||||||
rcu_read_unlock();
|
|
||||||
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
|
||||||
void *data)
|
|
||||||
{
|
|
||||||
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
|
||||||
int node;
|
|
||||||
|
|
||||||
for_each_node(node) {
|
|
||||||
struct io_wqe *wqe = wq->wqes[node];
|
|
||||||
|
|
||||||
ret = io_wqe_cancel_cb_work(wqe, cancel, data);
|
|
||||||
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct work_match {
|
|
||||||
bool (*fn)(struct io_wq_work *, void *data);
|
|
||||||
void *data;
|
|
||||||
};
|
|
||||||
|
|
||||||
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
|
||||||
{
|
|
||||||
struct work_match *match = data;
|
|
||||||
unsigned long flags;
|
|
||||||
bool ret = false;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&worker->lock, flags);
|
|
||||||
if (match->fn(worker->cur_work, match->data) &&
|
|
||||||
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
|
|
||||||
send_sig(SIGINT, worker->task, 1);
|
send_sig(SIGINT, worker->task, 1);
|
||||||
ret = true;
|
ret = true;
|
||||||
}
|
}
|
||||||
|
@ -960,7 +925,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
struct work_match *match)
|
struct io_cb_cancel_data *match)
|
||||||
{
|
{
|
||||||
struct io_wq_work_node *node, *prev;
|
struct io_wq_work_node *node, *prev;
|
||||||
struct io_wq_work *work;
|
struct io_wq_work *work;
|
||||||
|
@ -977,7 +942,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
work = container_of(node, struct io_wq_work, list);
|
work = container_of(node, struct io_wq_work, list);
|
||||||
|
|
||||||
if (match->fn(work, match->data)) {
|
if (match->fn(work, match->data)) {
|
||||||
wq_node_del(&wqe->work_list, node, prev);
|
wq_list_del(&wqe->work_list, node, prev);
|
||||||
found = true;
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -985,7 +950,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
spin_unlock_irqrestore(&wqe->lock, flags);
|
spin_unlock_irqrestore(&wqe->lock, flags);
|
||||||
|
|
||||||
if (found) {
|
if (found) {
|
||||||
io_run_cancel(work);
|
io_run_cancel(work, wqe);
|
||||||
return IO_WQ_CANCEL_OK;
|
return IO_WQ_CANCEL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1001,60 +966,49 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
|
||||||
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
|
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool io_wq_work_match(struct io_wq_work *work, void *data)
|
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
struct io_cb_cancel_data match = {
|
||||||
|
.fn = cancel,
|
||||||
|
.data = data,
|
||||||
|
};
|
||||||
|
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
||||||
|
int node;
|
||||||
|
|
||||||
|
for_each_node(node) {
|
||||||
|
struct io_wqe *wqe = wq->wqes[node];
|
||||||
|
|
||||||
|
ret = io_wqe_cancel_work(wqe, &match);
|
||||||
|
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
|
||||||
{
|
{
|
||||||
return work == data;
|
return work == data;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
|
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
|
||||||
{
|
{
|
||||||
struct work_match match = {
|
return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork);
|
||||||
.fn = io_wq_work_match,
|
|
||||||
.data = cwork
|
|
||||||
};
|
|
||||||
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
|
||||||
int node;
|
|
||||||
|
|
||||||
cwork->flags |= IO_WQ_WORK_CANCEL;
|
|
||||||
|
|
||||||
for_each_node(node) {
|
|
||||||
struct io_wqe *wqe = wq->wqes[node];
|
|
||||||
|
|
||||||
ret = io_wqe_cancel_work(wqe, &match);
|
|
||||||
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool io_wq_pid_match(struct io_wq_work *work, void *data)
|
static bool io_wq_pid_match(struct io_wq_work *work, void *data)
|
||||||
{
|
{
|
||||||
pid_t pid = (pid_t) (unsigned long) data;
|
pid_t pid = (pid_t) (unsigned long) data;
|
||||||
|
|
||||||
if (work)
|
return work->task_pid == pid;
|
||||||
return work->task_pid == pid;
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
|
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
|
||||||
{
|
{
|
||||||
struct work_match match = {
|
void *data = (void *) (unsigned long) pid;
|
||||||
.fn = io_wq_pid_match,
|
|
||||||
.data = (void *) (unsigned long) pid
|
|
||||||
};
|
|
||||||
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
|
|
||||||
int node;
|
|
||||||
|
|
||||||
for_each_node(node) {
|
return io_wq_cancel_cb(wq, io_wq_pid_match, data);
|
||||||
struct io_wqe *wqe = wq->wqes[node];
|
|
||||||
|
|
||||||
ret = io_wqe_cancel_work(wqe, &match);
|
|
||||||
if (ret != IO_WQ_CANCEL_NOTFOUND)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||||
|
@ -1062,6 +1016,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||||
int ret = -ENOMEM, node;
|
int ret = -ENOMEM, node;
|
||||||
struct io_wq *wq;
|
struct io_wq *wq;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!data->free_work))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
|
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
|
||||||
if (!wq)
|
if (!wq)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
@ -1072,8 +1029,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
wq->get_work = data->get_work;
|
wq->free_work = data->free_work;
|
||||||
wq->put_work = data->put_work;
|
|
||||||
|
|
||||||
/* caller must already hold a reference to this */
|
/* caller must already hold a reference to this */
|
||||||
wq->user = data->user;
|
wq->user = data->user;
|
||||||
|
@ -1130,7 +1086,7 @@ err:
|
||||||
|
|
||||||
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
|
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
|
||||||
{
|
{
|
||||||
if (data->get_work != wq->get_work || data->put_work != wq->put_work)
|
if (data->free_work != wq->free_work)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return refcount_inc_not_zero(&wq->use_refs);
|
return refcount_inc_not_zero(&wq->use_refs);
|
||||||
|
|
65
fs/io-wq.h
65
fs/io-wq.h
|
@ -5,10 +5,8 @@ struct io_wq;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IO_WQ_WORK_CANCEL = 1,
|
IO_WQ_WORK_CANCEL = 1,
|
||||||
IO_WQ_WORK_HAS_MM = 2,
|
|
||||||
IO_WQ_WORK_HASHED = 4,
|
IO_WQ_WORK_HASHED = 4,
|
||||||
IO_WQ_WORK_UNBOUND = 32,
|
IO_WQ_WORK_UNBOUND = 32,
|
||||||
IO_WQ_WORK_CB = 128,
|
|
||||||
IO_WQ_WORK_NO_CANCEL = 256,
|
IO_WQ_WORK_NO_CANCEL = 256,
|
||||||
IO_WQ_WORK_CONCURRENT = 512,
|
IO_WQ_WORK_CONCURRENT = 512,
|
||||||
|
|
||||||
|
@ -30,6 +28,18 @@ struct io_wq_work_list {
|
||||||
struct io_wq_work_node *last;
|
struct io_wq_work_node *last;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline void wq_list_add_after(struct io_wq_work_node *node,
|
||||||
|
struct io_wq_work_node *pos,
|
||||||
|
struct io_wq_work_list *list)
|
||||||
|
{
|
||||||
|
struct io_wq_work_node *next = pos->next;
|
||||||
|
|
||||||
|
pos->next = node;
|
||||||
|
node->next = next;
|
||||||
|
if (!next)
|
||||||
|
list->last = node;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||||
struct io_wq_work_list *list)
|
struct io_wq_work_list *list)
|
||||||
{
|
{
|
||||||
|
@ -42,17 +52,26 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void wq_node_del(struct io_wq_work_list *list,
|
static inline void wq_list_cut(struct io_wq_work_list *list,
|
||||||
|
struct io_wq_work_node *last,
|
||||||
|
struct io_wq_work_node *prev)
|
||||||
|
{
|
||||||
|
/* first in the list, if prev==NULL */
|
||||||
|
if (!prev)
|
||||||
|
WRITE_ONCE(list->first, last->next);
|
||||||
|
else
|
||||||
|
prev->next = last->next;
|
||||||
|
|
||||||
|
if (last == list->last)
|
||||||
|
list->last = prev;
|
||||||
|
last->next = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wq_list_del(struct io_wq_work_list *list,
|
||||||
struct io_wq_work_node *node,
|
struct io_wq_work_node *node,
|
||||||
struct io_wq_work_node *prev)
|
struct io_wq_work_node *prev)
|
||||||
{
|
{
|
||||||
if (node == list->first)
|
wq_list_cut(list, node, prev);
|
||||||
WRITE_ONCE(list->first, node->next);
|
|
||||||
if (node == list->last)
|
|
||||||
list->last = prev;
|
|
||||||
if (prev)
|
|
||||||
prev->next = node->next;
|
|
||||||
node->next = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define wq_list_for_each(pos, prv, head) \
|
#define wq_list_for_each(pos, prv, head) \
|
||||||
|
@ -65,10 +84,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
struct io_wq_work {
|
struct io_wq_work {
|
||||||
union {
|
struct io_wq_work_node list;
|
||||||
struct io_wq_work_node list;
|
|
||||||
void *data;
|
|
||||||
};
|
|
||||||
void (*func)(struct io_wq_work **);
|
void (*func)(struct io_wq_work **);
|
||||||
struct files_struct *files;
|
struct files_struct *files;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
|
@ -83,14 +99,20 @@ struct io_wq_work {
|
||||||
*(work) = (struct io_wq_work){ .func = _func }; \
|
*(work) = (struct io_wq_work){ .func = _func }; \
|
||||||
} while (0) \
|
} while (0) \
|
||||||
|
|
||||||
typedef void (get_work_fn)(struct io_wq_work *);
|
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
|
||||||
typedef void (put_work_fn)(struct io_wq_work *);
|
{
|
||||||
|
if (!work->list.next)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return container_of(work->list.next, struct io_wq_work, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef void (free_work_fn)(struct io_wq_work *);
|
||||||
|
|
||||||
struct io_wq_data {
|
struct io_wq_data {
|
||||||
struct user_struct *user;
|
struct user_struct *user;
|
||||||
|
|
||||||
get_work_fn *get_work;
|
free_work_fn *free_work;
|
||||||
put_work_fn *put_work;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
|
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
|
||||||
|
@ -98,7 +120,12 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
|
||||||
void io_wq_destroy(struct io_wq *wq);
|
void io_wq_destroy(struct io_wq *wq);
|
||||||
|
|
||||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
||||||
void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
|
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
||||||
|
|
||||||
|
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||||
|
{
|
||||||
|
return work->flags & IO_WQ_WORK_HASHED;
|
||||||
|
}
|
||||||
|
|
||||||
void io_wq_cancel_all(struct io_wq *wq);
|
void io_wq_cancel_all(struct io_wq *wq);
|
||||||
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
|
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
|
||||||
|
|
2027
fs/io_uring.c
2027
fs/io_uring.c
File diff suppressed because it is too large
Load Diff
|
@ -1109,9 +1109,9 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
|
||||||
/*
|
/*
|
||||||
* Determine where to splice to/from.
|
* Determine where to splice to/from.
|
||||||
*/
|
*/
|
||||||
static long do_splice(struct file *in, loff_t __user *off_in,
|
long do_splice(struct file *in, loff_t __user *off_in,
|
||||||
struct file *out, loff_t __user *off_out,
|
struct file *out, loff_t __user *off_out,
|
||||||
size_t len, unsigned int flags)
|
size_t len, unsigned int flags)
|
||||||
{
|
{
|
||||||
struct pipe_inode_info *ipipe;
|
struct pipe_inode_info *ipipe;
|
||||||
struct pipe_inode_info *opipe;
|
struct pipe_inode_info *opipe;
|
||||||
|
|
|
@ -391,6 +391,10 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg,
|
||||||
struct user_msghdr __user *umsg, unsigned flags,
|
struct user_msghdr __user *umsg, unsigned flags,
|
||||||
struct sockaddr __user **uaddr,
|
struct sockaddr __user **uaddr,
|
||||||
struct iovec **iov);
|
struct iovec **iov);
|
||||||
|
extern int __copy_msghdr_from_user(struct msghdr *kmsg,
|
||||||
|
struct user_msghdr __user *umsg,
|
||||||
|
struct sockaddr __user **save_addr,
|
||||||
|
struct iovec __user **uiov, size_t *nsegs);
|
||||||
|
|
||||||
/* helpers which do the actual work for syscalls */
|
/* helpers which do the actual work for syscalls */
|
||||||
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
|
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
|
||||||
|
|
|
@ -78,6 +78,9 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
|
||||||
struct pipe_buffer *);
|
struct pipe_buffer *);
|
||||||
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
|
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
|
||||||
splice_direct_actor *);
|
splice_direct_actor *);
|
||||||
|
extern long do_splice(struct file *in, loff_t __user *off_in,
|
||||||
|
struct file *out, loff_t __user *off_out,
|
||||||
|
size_t len, unsigned int flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for dynamic pipe sizing
|
* for dynamic pipe sizing
|
||||||
|
|
|
@ -38,6 +38,9 @@ struct compat_cmsghdr {
|
||||||
#define compat_mmsghdr mmsghdr
|
#define compat_mmsghdr mmsghdr
|
||||||
#endif /* defined(CONFIG_COMPAT) */
|
#endif /* defined(CONFIG_COMPAT) */
|
||||||
|
|
||||||
|
int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg,
|
||||||
|
struct sockaddr __user **save_addr, compat_uptr_t *ptr,
|
||||||
|
compat_size_t *len);
|
||||||
int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
|
int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
|
||||||
struct sockaddr __user **, struct iovec **);
|
struct sockaddr __user **, struct iovec **);
|
||||||
struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval);
|
struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval);
|
||||||
|
|
|
@ -357,6 +357,109 @@ TRACE_EVENT(io_uring_submit_sqe,
|
||||||
__entry->force_nonblock, __entry->sq_thread)
|
__entry->force_nonblock, __entry->sq_thread)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(io_uring_poll_arm,
|
||||||
|
|
||||||
|
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
|
||||||
|
|
||||||
|
TP_ARGS(ctx, opcode, user_data, mask, events),
|
||||||
|
|
||||||
|
TP_STRUCT__entry (
|
||||||
|
__field( void *, ctx )
|
||||||
|
__field( u8, opcode )
|
||||||
|
__field( u64, user_data )
|
||||||
|
__field( int, mask )
|
||||||
|
__field( int, events )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->ctx = ctx;
|
||||||
|
__entry->opcode = opcode;
|
||||||
|
__entry->user_data = user_data;
|
||||||
|
__entry->mask = mask;
|
||||||
|
__entry->events = events;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
|
||||||
|
__entry->ctx, __entry->opcode,
|
||||||
|
(unsigned long long) __entry->user_data,
|
||||||
|
__entry->mask, __entry->events)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(io_uring_poll_wake,
|
||||||
|
|
||||||
|
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
|
||||||
|
|
||||||
|
TP_ARGS(ctx, opcode, user_data, mask),
|
||||||
|
|
||||||
|
TP_STRUCT__entry (
|
||||||
|
__field( void *, ctx )
|
||||||
|
__field( u8, opcode )
|
||||||
|
__field( u64, user_data )
|
||||||
|
__field( int, mask )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->ctx = ctx;
|
||||||
|
__entry->opcode = opcode;
|
||||||
|
__entry->user_data = user_data;
|
||||||
|
__entry->mask = mask;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
|
||||||
|
__entry->ctx, __entry->opcode,
|
||||||
|
(unsigned long long) __entry->user_data,
|
||||||
|
__entry->mask)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(io_uring_task_add,
|
||||||
|
|
||||||
|
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
|
||||||
|
|
||||||
|
TP_ARGS(ctx, opcode, user_data, mask),
|
||||||
|
|
||||||
|
TP_STRUCT__entry (
|
||||||
|
__field( void *, ctx )
|
||||||
|
__field( u8, opcode )
|
||||||
|
__field( u64, user_data )
|
||||||
|
__field( int, mask )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->ctx = ctx;
|
||||||
|
__entry->opcode = opcode;
|
||||||
|
__entry->user_data = user_data;
|
||||||
|
__entry->mask = mask;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("ring %p, op %d, data 0x%llx, mask %x",
|
||||||
|
__entry->ctx, __entry->opcode,
|
||||||
|
(unsigned long long) __entry->user_data,
|
||||||
|
__entry->mask)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(io_uring_task_run,
|
||||||
|
|
||||||
|
TP_PROTO(void *ctx, u8 opcode, u64 user_data),
|
||||||
|
|
||||||
|
TP_ARGS(ctx, opcode, user_data),
|
||||||
|
|
||||||
|
TP_STRUCT__entry (
|
||||||
|
__field( void *, ctx )
|
||||||
|
__field( u8, opcode )
|
||||||
|
__field( u64, user_data )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->ctx = ctx;
|
||||||
|
__entry->opcode = opcode;
|
||||||
|
__entry->user_data = user_data;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("ring %p, op %d, data 0x%llx",
|
||||||
|
__entry->ctx, __entry->opcode,
|
||||||
|
(unsigned long long) __entry->user_data)
|
||||||
|
);
|
||||||
|
|
||||||
#endif /* _TRACE_IO_URING_H */
|
#endif /* _TRACE_IO_URING_H */
|
||||||
|
|
||||||
/* This part must be outside protection */
|
/* This part must be outside protection */
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
|
||||||
/*
|
/*
|
||||||
* Header file for the io_uring interface.
|
* Header file for the io_uring interface.
|
||||||
*
|
*
|
||||||
|
@ -23,7 +23,10 @@ struct io_uring_sqe {
|
||||||
__u64 off; /* offset into file */
|
__u64 off; /* offset into file */
|
||||||
__u64 addr2;
|
__u64 addr2;
|
||||||
};
|
};
|
||||||
__u64 addr; /* pointer to buffer or iovecs */
|
union {
|
||||||
|
__u64 addr; /* pointer to buffer or iovecs */
|
||||||
|
__u64 splice_off_in;
|
||||||
|
};
|
||||||
__u32 len; /* buffer size or number of iovecs */
|
__u32 len; /* buffer size or number of iovecs */
|
||||||
union {
|
union {
|
||||||
__kernel_rwf_t rw_flags;
|
__kernel_rwf_t rw_flags;
|
||||||
|
@ -37,14 +40,21 @@ struct io_uring_sqe {
|
||||||
__u32 open_flags;
|
__u32 open_flags;
|
||||||
__u32 statx_flags;
|
__u32 statx_flags;
|
||||||
__u32 fadvise_advice;
|
__u32 fadvise_advice;
|
||||||
|
__u32 splice_flags;
|
||||||
};
|
};
|
||||||
__u64 user_data; /* data to be passed back at completion time */
|
__u64 user_data; /* data to be passed back at completion time */
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
/* index into fixed buffers, if used */
|
/* pack this to avoid bogus arm OABI complaints */
|
||||||
__u16 buf_index;
|
union {
|
||||||
|
/* index into fixed buffers, if used */
|
||||||
|
__u16 buf_index;
|
||||||
|
/* for grouped buffer selection */
|
||||||
|
__u16 buf_group;
|
||||||
|
} __attribute__((packed));
|
||||||
/* personality to use, if used */
|
/* personality to use, if used */
|
||||||
__u16 personality;
|
__u16 personality;
|
||||||
|
__s32 splice_fd_in;
|
||||||
};
|
};
|
||||||
__u64 __pad2[3];
|
__u64 __pad2[3];
|
||||||
};
|
};
|
||||||
|
@ -56,6 +66,7 @@ enum {
|
||||||
IOSQE_IO_LINK_BIT,
|
IOSQE_IO_LINK_BIT,
|
||||||
IOSQE_IO_HARDLINK_BIT,
|
IOSQE_IO_HARDLINK_BIT,
|
||||||
IOSQE_ASYNC_BIT,
|
IOSQE_ASYNC_BIT,
|
||||||
|
IOSQE_BUFFER_SELECT_BIT,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -71,6 +82,8 @@ enum {
|
||||||
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
|
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
|
||||||
/* always go async */
|
/* always go async */
|
||||||
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
|
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
|
||||||
|
/* select buffer from sqe->buf_group */
|
||||||
|
#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* io_uring_setup() flags
|
* io_uring_setup() flags
|
||||||
|
@ -113,6 +126,9 @@ enum {
|
||||||
IORING_OP_RECV,
|
IORING_OP_RECV,
|
||||||
IORING_OP_OPENAT2,
|
IORING_OP_OPENAT2,
|
||||||
IORING_OP_EPOLL_CTL,
|
IORING_OP_EPOLL_CTL,
|
||||||
|
IORING_OP_SPLICE,
|
||||||
|
IORING_OP_PROVIDE_BUFFERS,
|
||||||
|
IORING_OP_REMOVE_BUFFERS,
|
||||||
|
|
||||||
/* this goes last, obviously */
|
/* this goes last, obviously */
|
||||||
IORING_OP_LAST,
|
IORING_OP_LAST,
|
||||||
|
@ -128,6 +144,12 @@ enum {
|
||||||
*/
|
*/
|
||||||
#define IORING_TIMEOUT_ABS (1U << 0)
|
#define IORING_TIMEOUT_ABS (1U << 0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sqe->splice_flags
|
||||||
|
* extends splice(2) flags
|
||||||
|
*/
|
||||||
|
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IO completion data structure (Completion Queue Entry)
|
* IO completion data structure (Completion Queue Entry)
|
||||||
*/
|
*/
|
||||||
|
@ -137,6 +159,17 @@ struct io_uring_cqe {
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cqe->flags
|
||||||
|
*
|
||||||
|
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
|
||||||
|
*/
|
||||||
|
#define IORING_CQE_F_BUFFER (1U << 0)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
IORING_CQE_BUFFER_SHIFT = 16,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Magic offsets for the application to mmap the data it needs
|
* Magic offsets for the application to mmap the data it needs
|
||||||
*/
|
*/
|
||||||
|
@ -204,6 +237,7 @@ struct io_uring_params {
|
||||||
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
|
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
|
||||||
#define IORING_FEAT_RW_CUR_POS (1U << 3)
|
#define IORING_FEAT_RW_CUR_POS (1U << 3)
|
||||||
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
|
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
|
||||||
|
#define IORING_FEAT_FAST_POLL (1U << 5)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* io_uring_register(2) opcodes and arguments
|
* io_uring_register(2) opcodes and arguments
|
||||||
|
|
|
@ -97,16 +97,26 @@ void task_work_run(void)
|
||||||
* work->func() can do task_work_add(), do not set
|
* work->func() can do task_work_add(), do not set
|
||||||
* work_exited unless the list is empty.
|
* work_exited unless the list is empty.
|
||||||
*/
|
*/
|
||||||
raw_spin_lock_irq(&task->pi_lock);
|
|
||||||
do {
|
do {
|
||||||
|
head = NULL;
|
||||||
work = READ_ONCE(task->task_works);
|
work = READ_ONCE(task->task_works);
|
||||||
head = !work && (task->flags & PF_EXITING) ?
|
if (!work) {
|
||||||
&work_exited : NULL;
|
if (task->flags & PF_EXITING)
|
||||||
|
head = &work_exited;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
} while (cmpxchg(&task->task_works, work, head) != work);
|
} while (cmpxchg(&task->task_works, work, head) != work);
|
||||||
raw_spin_unlock_irq(&task->pi_lock);
|
|
||||||
|
|
||||||
if (!work)
|
if (!work)
|
||||||
break;
|
break;
|
||||||
|
/*
|
||||||
|
* Synchronize with task_work_cancel(). It can not remove
|
||||||
|
* the first entry == work, cmpxchg(task_works) must fail.
|
||||||
|
* But it can remove another entry from the ->next list.
|
||||||
|
*/
|
||||||
|
raw_spin_lock_irq(&task->pi_lock);
|
||||||
|
raw_spin_unlock_irq(&task->pi_lock);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
next = work->next;
|
next = work->next;
|
||||||
|
|
30
net/compat.c
30
net/compat.c
|
@ -33,10 +33,10 @@
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <net/compat.h>
|
#include <net/compat.h>
|
||||||
|
|
||||||
int get_compat_msghdr(struct msghdr *kmsg,
|
int __get_compat_msghdr(struct msghdr *kmsg,
|
||||||
struct compat_msghdr __user *umsg,
|
struct compat_msghdr __user *umsg,
|
||||||
struct sockaddr __user **save_addr,
|
struct sockaddr __user **save_addr,
|
||||||
struct iovec **iov)
|
compat_uptr_t *ptr, compat_size_t *len)
|
||||||
{
|
{
|
||||||
struct compat_msghdr msg;
|
struct compat_msghdr msg;
|
||||||
ssize_t err;
|
ssize_t err;
|
||||||
|
@ -79,10 +79,26 @@ int get_compat_msghdr(struct msghdr *kmsg,
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
|
|
||||||
kmsg->msg_iocb = NULL;
|
kmsg->msg_iocb = NULL;
|
||||||
|
*ptr = msg.msg_iov;
|
||||||
|
*len = msg.msg_iovlen;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
err = compat_import_iovec(save_addr ? READ : WRITE,
|
int get_compat_msghdr(struct msghdr *kmsg,
|
||||||
compat_ptr(msg.msg_iov), msg.msg_iovlen,
|
struct compat_msghdr __user *umsg,
|
||||||
UIO_FASTIOV, iov, &kmsg->msg_iter);
|
struct sockaddr __user **save_addr,
|
||||||
|
struct iovec **iov)
|
||||||
|
{
|
||||||
|
compat_uptr_t ptr;
|
||||||
|
compat_size_t len;
|
||||||
|
ssize_t err;
|
||||||
|
|
||||||
|
err = __get_compat_msghdr(kmsg, umsg, save_addr, &ptr, &len);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
|
||||||
|
len, UIO_FASTIOV, iov, &kmsg->msg_iter);
|
||||||
return err < 0 ? err : 0;
|
return err < 0 ? err : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
25
net/socket.c
25
net/socket.c
|
@ -2228,10 +2228,10 @@ struct used_address {
|
||||||
unsigned int name_len;
|
unsigned int name_len;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int copy_msghdr_from_user(struct msghdr *kmsg,
|
int __copy_msghdr_from_user(struct msghdr *kmsg,
|
||||||
struct user_msghdr __user *umsg,
|
struct user_msghdr __user *umsg,
|
||||||
struct sockaddr __user **save_addr,
|
struct sockaddr __user **save_addr,
|
||||||
struct iovec **iov)
|
struct iovec __user **uiov, size_t *nsegs)
|
||||||
{
|
{
|
||||||
struct user_msghdr msg;
|
struct user_msghdr msg;
|
||||||
ssize_t err;
|
ssize_t err;
|
||||||
|
@ -2273,6 +2273,23 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
|
|
||||||
kmsg->msg_iocb = NULL;
|
kmsg->msg_iocb = NULL;
|
||||||
|
*uiov = msg.msg_iov;
|
||||||
|
*nsegs = msg.msg_iovlen;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int copy_msghdr_from_user(struct msghdr *kmsg,
|
||||||
|
struct user_msghdr __user *umsg,
|
||||||
|
struct sockaddr __user **save_addr,
|
||||||
|
struct iovec **iov)
|
||||||
|
{
|
||||||
|
struct user_msghdr msg;
|
||||||
|
ssize_t err;
|
||||||
|
|
||||||
|
err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
|
||||||
|
&msg.msg_iovlen);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
err = import_iovec(save_addr ? READ : WRITE,
|
err = import_iovec(save_addr ? READ : WRITE,
|
||||||
msg.msg_iov, msg.msg_iovlen,
|
msg.msg_iov, msg.msg_iovlen,
|
||||||
|
|
Loading…
Reference in New Issue