for-5.15/io_uring-2021-08-30
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmEs7tIQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpoR2EACdOPj0tivXWufgFnQyQYPpX4/Qe3lNw608 MLJ9/zshPFe5kx+SnHzT3UucHd2LO4C68pNVEHdHoi1gqMnMe9+Du82LJlo+Cx9i 53yiWxNiY7er3t3lC2nvaPF0BPKiaUKaRzqugOTfdWmKLP3MyYyygEBrRDkaK1S1 BjVq2ewmKTYf63gHkluHeRTav9KxcLWvSqgFUC8Y0mNhazTSdzGB6MAPFodpsuj1 Vv8ytCiagp9Gi0AibvS2mZvV/WxQtFP7qBofbnG3KcgKgOU+XKTJCH+cU6E/3J9Q nPy1loh2TISOtYAz2scypAbwsK4FWeAHg1SaIj/RtUGKG7zpVU5u97CPZ8+UfHzu CuR3a1o36Cck8+ZdZIjtRZfvQGog0Dh5u4ZQ4dRwFQd6FiVxdO8LHkegqkV6PStc dVrHSo5kUE5hGT8ed1YFfuOSJDZ6w0/LleZGMdU4pRGGs8wqkerZlfLL4ustSfLk AcS+azmG2f3iI5iadnxOUNeOT2lmE84fvvLyH2krfsA3AtX0CtHXQcLYAguRAIwg gnvYf70JOya6Lb/hbXUi8d8h3uXxeFsoitz1QyasqAEoJoY05EW+l94NbiEzXwol mKrVfZCk+wuhw3npbVCqK7nkepvBWso1qTip//T0HDAaKaZnZcmhobUn5SwI+QPx fcgAo6iw8g== =WBzF -----END PGP SIGNATURE----- Merge tag 'for-5.15/io_uring-2021-08-30' of git://git.kernel.dk/linux-block Pull io_uring updates from Jens Axboe: - cancellation cleanups (Hao, Pavel) - io-wq accounting cleanup (Hao) - io_uring submit locking fix (Hao) - io_uring link handling fixes (Hao) - fixed file improvements (wangyangbo, Pavel) - allow updates of linked timeouts like regular timeouts (Pavel) - IOPOLL fix (Pavel) - remove batched file get optimization (Pavel) - improve reference handling (Pavel) - IRQ task_work batching (Pavel) - allow pure fixed file, and add support for open/accept (Pavel) - GFP_ATOMIC RT kernel fix - multiple CQ ring waiter improvement - funnel IRQ completions through task_work - add support for limiting async workers explicitly - add different clocksource support for timeouts - io-wq wakeup race fix - lots of cleanups and improvement (Pavel et al) * tag 'for-5.15/io_uring-2021-08-30' of git://git.kernel.dk/linux-block: (87 commits) io-wq: fix wakeup race when adding new work io-wq: wqe and worker locks no longer need to be IRQ safe io-wq: check max_worker limits if a worker transitions bound state io_uring: allow updating linked timeouts io_uring: keep ltimeouts in a list io_uring: support CLOCK_BOOTTIME/REALTIME for timeouts io-wq: provide a way to limit max number of workers io_uring: add build check for buf_index overflows io_uring: clarify io_req_task_cancel() locking io_uring: add task-refs-get helper io_uring: fix failed linkchain code logic io_uring: remove redundant req_set_fail() io_uring: don't free request to slab io_uring: accept directly into fixed file table io_uring: hand code io_accept() fd installing io_uring: openat directly into fixed fd table net: add accept helper not installing fd io_uring: fix io_try_cancel_userdata race for iowq io_uring: IRQ rw completion batching io_uring: batch task work locking ...
This commit is contained in:
commit
c547d89a9a
206
fs/io-wq.c
206
fs/io-wq.c
|
@ -51,6 +51,10 @@ struct io_worker {
|
||||||
|
|
||||||
struct completion ref_done;
|
struct completion ref_done;
|
||||||
|
|
||||||
|
unsigned long create_state;
|
||||||
|
struct callback_head create_work;
|
||||||
|
int create_index;
|
||||||
|
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -174,7 +178,7 @@ static void io_worker_exit(struct io_worker *worker)
|
||||||
complete(&worker->ref_done);
|
complete(&worker->ref_done);
|
||||||
wait_for_completion(&worker->ref_done);
|
wait_for_completion(&worker->ref_done);
|
||||||
|
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
if (worker->flags & IO_WORKER_F_FREE)
|
if (worker->flags & IO_WORKER_F_FREE)
|
||||||
hlist_nulls_del_rcu(&worker->nulls_node);
|
hlist_nulls_del_rcu(&worker->nulls_node);
|
||||||
list_del_rcu(&worker->all_list);
|
list_del_rcu(&worker->all_list);
|
||||||
|
@ -184,7 +188,7 @@ static void io_worker_exit(struct io_worker *worker)
|
||||||
worker->flags = 0;
|
worker->flags = 0;
|
||||||
current->flags &= ~PF_IO_WORKER;
|
current->flags &= ~PF_IO_WORKER;
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
|
|
||||||
kfree_rcu(worker, rcu);
|
kfree_rcu(worker, rcu);
|
||||||
io_worker_ref_put(wqe->wq);
|
io_worker_ref_put(wqe->wq);
|
||||||
|
@ -250,19 +254,20 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
bool do_create = false, first = false;
|
bool do_create = false, first = false;
|
||||||
|
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
if (acct->nr_workers < acct->max_workers) {
|
if (acct->nr_workers < acct->max_workers) {
|
||||||
atomic_inc(&acct->nr_running);
|
|
||||||
atomic_inc(&wqe->wq->worker_refs);
|
|
||||||
if (!acct->nr_workers)
|
if (!acct->nr_workers)
|
||||||
first = true;
|
first = true;
|
||||||
acct->nr_workers++;
|
acct->nr_workers++;
|
||||||
do_create = true;
|
do_create = true;
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
if (do_create)
|
if (do_create) {
|
||||||
|
atomic_inc(&acct->nr_running);
|
||||||
|
atomic_inc(&wqe->wq->worker_refs);
|
||||||
create_io_worker(wqe->wq, wqe, acct->index, first);
|
create_io_worker(wqe->wq, wqe, acct->index, first);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_wqe_inc_running(struct io_worker *worker)
|
static void io_wqe_inc_running(struct io_worker *worker)
|
||||||
|
@ -272,60 +277,63 @@ static void io_wqe_inc_running(struct io_worker *worker)
|
||||||
atomic_inc(&acct->nr_running);
|
atomic_inc(&acct->nr_running);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct create_worker_data {
|
|
||||||
struct callback_head work;
|
|
||||||
struct io_wqe *wqe;
|
|
||||||
int index;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void create_worker_cb(struct callback_head *cb)
|
static void create_worker_cb(struct callback_head *cb)
|
||||||
{
|
{
|
||||||
struct create_worker_data *cwd;
|
struct io_worker *worker;
|
||||||
struct io_wq *wq;
|
struct io_wq *wq;
|
||||||
struct io_wqe *wqe;
|
struct io_wqe *wqe;
|
||||||
struct io_wqe_acct *acct;
|
struct io_wqe_acct *acct;
|
||||||
bool do_create = false, first = false;
|
bool do_create = false, first = false;
|
||||||
|
|
||||||
cwd = container_of(cb, struct create_worker_data, work);
|
worker = container_of(cb, struct io_worker, create_work);
|
||||||
wqe = cwd->wqe;
|
wqe = worker->wqe;
|
||||||
wq = wqe->wq;
|
wq = wqe->wq;
|
||||||
acct = &wqe->acct[cwd->index];
|
acct = &wqe->acct[worker->create_index];
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
if (acct->nr_workers < acct->max_workers) {
|
if (acct->nr_workers < acct->max_workers) {
|
||||||
if (!acct->nr_workers)
|
if (!acct->nr_workers)
|
||||||
first = true;
|
first = true;
|
||||||
acct->nr_workers++;
|
acct->nr_workers++;
|
||||||
do_create = true;
|
do_create = true;
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
if (do_create) {
|
if (do_create) {
|
||||||
create_io_worker(wq, wqe, cwd->index, first);
|
create_io_worker(wq, wqe, worker->create_index, first);
|
||||||
} else {
|
} else {
|
||||||
atomic_dec(&acct->nr_running);
|
atomic_dec(&acct->nr_running);
|
||||||
io_worker_ref_put(wq);
|
io_worker_ref_put(wq);
|
||||||
}
|
}
|
||||||
kfree(cwd);
|
clear_bit_unlock(0, &worker->create_state);
|
||||||
|
io_worker_release(worker);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_queue_worker_create(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
|
||||||
|
struct io_wqe_acct *acct)
|
||||||
{
|
{
|
||||||
struct create_worker_data *cwd;
|
|
||||||
struct io_wq *wq = wqe->wq;
|
struct io_wq *wq = wqe->wq;
|
||||||
|
|
||||||
/* raced with exit, just ignore create call */
|
/* raced with exit, just ignore create call */
|
||||||
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
|
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
if (!io_worker_get(worker))
|
||||||
|
goto fail;
|
||||||
|
/*
|
||||||
|
* create_state manages ownership of create_work/index. We should
|
||||||
|
* only need one entry per worker, as the worker going to sleep
|
||||||
|
* will trigger the condition, and waking will clear it once it
|
||||||
|
* runs the task_work.
|
||||||
|
*/
|
||||||
|
if (test_bit(0, &worker->create_state) ||
|
||||||
|
test_and_set_bit_lock(0, &worker->create_state))
|
||||||
|
goto fail_release;
|
||||||
|
|
||||||
cwd = kmalloc(sizeof(*cwd), GFP_ATOMIC);
|
init_task_work(&worker->create_work, create_worker_cb);
|
||||||
if (cwd) {
|
worker->create_index = acct->index;
|
||||||
init_task_work(&cwd->work, create_worker_cb);
|
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
|
||||||
cwd->wqe = wqe;
|
|
||||||
cwd->index = acct->index;
|
|
||||||
if (!task_work_add(wq->task, &cwd->work, TWA_SIGNAL))
|
|
||||||
return;
|
return;
|
||||||
|
clear_bit_unlock(0, &worker->create_state);
|
||||||
kfree(cwd);
|
fail_release:
|
||||||
}
|
io_worker_release(worker);
|
||||||
fail:
|
fail:
|
||||||
atomic_dec(&acct->nr_running);
|
atomic_dec(&acct->nr_running);
|
||||||
io_worker_ref_put(wq);
|
io_worker_ref_put(wq);
|
||||||
|
@ -343,7 +351,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
|
||||||
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
|
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
|
||||||
atomic_inc(&acct->nr_running);
|
atomic_inc(&acct->nr_running);
|
||||||
atomic_inc(&wqe->wq->worker_refs);
|
atomic_inc(&wqe->wq->worker_refs);
|
||||||
io_queue_worker_create(wqe, acct);
|
io_queue_worker_create(wqe, worker, acct);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,7 +424,28 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
|
||||||
spin_unlock(&wq->hash->wait.lock);
|
spin_unlock(&wq->hash->wait.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
|
/*
|
||||||
|
* We can always run the work if the worker is currently the same type as
|
||||||
|
* the work (eg both are bound, or both are unbound). If they are not the
|
||||||
|
* same, only allow it if incrementing the worker count would be allowed.
|
||||||
|
*/
|
||||||
|
static bool io_worker_can_run_work(struct io_worker *worker,
|
||||||
|
struct io_wq_work *work)
|
||||||
|
{
|
||||||
|
struct io_wqe_acct *acct;
|
||||||
|
|
||||||
|
if (!(worker->flags & IO_WORKER_F_BOUND) !=
|
||||||
|
!(work->flags & IO_WQ_WORK_UNBOUND))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* not the same type, check if we'd go over the limit */
|
||||||
|
acct = io_work_get_acct(worker->wqe, work);
|
||||||
|
return acct->nr_workers < acct->max_workers;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
||||||
|
struct io_worker *worker,
|
||||||
|
bool *stalled)
|
||||||
__must_hold(wqe->lock)
|
__must_hold(wqe->lock)
|
||||||
{
|
{
|
||||||
struct io_wq_work_node *node, *prev;
|
struct io_wq_work_node *node, *prev;
|
||||||
|
@ -428,6 +457,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
|
||||||
|
|
||||||
work = container_of(node, struct io_wq_work, list);
|
work = container_of(node, struct io_wq_work, list);
|
||||||
|
|
||||||
|
if (!io_worker_can_run_work(worker, work))
|
||||||
|
break;
|
||||||
|
|
||||||
/* not hashed, can run anytime */
|
/* not hashed, can run anytime */
|
||||||
if (!io_wq_is_hashed(work)) {
|
if (!io_wq_is_hashed(work)) {
|
||||||
wq_list_del(&wqe->work_list, node, prev);
|
wq_list_del(&wqe->work_list, node, prev);
|
||||||
|
@ -454,6 +486,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
|
||||||
raw_spin_unlock(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
io_wait_on_hash(wqe, stall_hash);
|
io_wait_on_hash(wqe, stall_hash);
|
||||||
raw_spin_lock(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
|
*stalled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -477,9 +510,9 @@ static void io_assign_current_work(struct io_worker *worker,
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irq(&worker->lock);
|
spin_lock(&worker->lock);
|
||||||
worker->cur_work = work;
|
worker->cur_work = work;
|
||||||
spin_unlock_irq(&worker->lock);
|
spin_unlock(&worker->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
|
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
|
||||||
|
@ -493,6 +526,7 @@ static void io_worker_handle_work(struct io_worker *worker)
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct io_wq_work *work;
|
struct io_wq_work *work;
|
||||||
|
bool stalled;
|
||||||
get_next:
|
get_next:
|
||||||
/*
|
/*
|
||||||
* If we got some work, mark us as busy. If we didn't, but
|
* If we got some work, mark us as busy. If we didn't, but
|
||||||
|
@ -501,13 +535,14 @@ get_next:
|
||||||
* can't make progress, any work completion or insertion will
|
* can't make progress, any work completion or insertion will
|
||||||
* clear the stalled flag.
|
* clear the stalled flag.
|
||||||
*/
|
*/
|
||||||
work = io_get_next_work(wqe);
|
stalled = false;
|
||||||
|
work = io_get_next_work(wqe, worker, &stalled);
|
||||||
if (work)
|
if (work)
|
||||||
__io_worker_busy(wqe, worker, work);
|
__io_worker_busy(wqe, worker, work);
|
||||||
else if (!wq_list_empty(&wqe->work_list))
|
else if (stalled)
|
||||||
wqe->flags |= IO_WQE_FLAG_STALLED;
|
wqe->flags |= IO_WQE_FLAG_STALLED;
|
||||||
|
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
if (!work)
|
if (!work)
|
||||||
break;
|
break;
|
||||||
io_assign_current_work(worker, work);
|
io_assign_current_work(worker, work);
|
||||||
|
@ -539,16 +574,16 @@ get_next:
|
||||||
clear_bit(hash, &wq->hash->map);
|
clear_bit(hash, &wq->hash->map);
|
||||||
if (wq_has_sleeper(&wq->hash->wait))
|
if (wq_has_sleeper(&wq->hash->wait))
|
||||||
wake_up(&wq->hash->wait);
|
wake_up(&wq->hash->wait);
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||||
/* skip unnecessary unlock-lock wqe->lock */
|
/* skip unnecessary unlock-lock wqe->lock */
|
||||||
if (!work)
|
if (!work)
|
||||||
goto get_next;
|
goto get_next;
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
}
|
}
|
||||||
} while (work);
|
} while (work);
|
||||||
|
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
} while (1);
|
} while (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,13 +604,13 @@ static int io_wqe_worker(void *data)
|
||||||
|
|
||||||
set_current_state(TASK_INTERRUPTIBLE);
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
loop:
|
loop:
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
if (io_wqe_run_queue(wqe)) {
|
if (io_wqe_run_queue(wqe)) {
|
||||||
io_worker_handle_work(worker);
|
io_worker_handle_work(worker);
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
__io_worker_idle(wqe, worker);
|
__io_worker_idle(wqe, worker);
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
if (io_flush_signals())
|
if (io_flush_signals())
|
||||||
continue;
|
continue;
|
||||||
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
|
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
|
||||||
|
@ -594,7 +629,7 @@ loop:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
io_worker_handle_work(worker);
|
io_worker_handle_work(worker);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -636,9 +671,9 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
|
||||||
|
|
||||||
worker->flags &= ~IO_WORKER_F_RUNNING;
|
worker->flags &= ~IO_WORKER_F_RUNNING;
|
||||||
|
|
||||||
raw_spin_lock_irq(&worker->wqe->lock);
|
raw_spin_lock(&worker->wqe->lock);
|
||||||
io_wqe_dec_running(worker);
|
io_wqe_dec_running(worker);
|
||||||
raw_spin_unlock_irq(&worker->wqe->lock);
|
raw_spin_unlock(&worker->wqe->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
|
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
|
||||||
|
@ -664,9 +699,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bo
|
||||||
kfree(worker);
|
kfree(worker);
|
||||||
fail:
|
fail:
|
||||||
atomic_dec(&acct->nr_running);
|
atomic_dec(&acct->nr_running);
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
acct->nr_workers--;
|
acct->nr_workers--;
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
io_worker_ref_put(wq);
|
io_worker_ref_put(wq);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -676,7 +711,7 @@ fail:
|
||||||
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
|
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
|
||||||
tsk->flags |= PF_NO_SETAFFINITY;
|
tsk->flags |= PF_NO_SETAFFINITY;
|
||||||
|
|
||||||
raw_spin_lock_irq(&wqe->lock);
|
raw_spin_lock(&wqe->lock);
|
||||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
|
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
|
||||||
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
|
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
|
||||||
worker->flags |= IO_WORKER_F_FREE;
|
worker->flags |= IO_WORKER_F_FREE;
|
||||||
|
@ -684,7 +719,7 @@ fail:
|
||||||
worker->flags |= IO_WORKER_F_BOUND;
|
worker->flags |= IO_WORKER_F_BOUND;
|
||||||
if (first && (worker->flags & IO_WORKER_F_BOUND))
|
if (first && (worker->flags & IO_WORKER_F_BOUND))
|
||||||
worker->flags |= IO_WORKER_F_FIXED;
|
worker->flags |= IO_WORKER_F_FIXED;
|
||||||
raw_spin_unlock_irq(&wqe->lock);
|
raw_spin_unlock(&wqe->lock);
|
||||||
wake_up_new_task(tsk);
|
wake_up_new_task(tsk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -759,8 +794,7 @@ append:
|
||||||
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||||
{
|
{
|
||||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||||
int work_flags;
|
bool do_wake;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If io-wq is exiting for this task, or if the request has explicitly
|
* If io-wq is exiting for this task, or if the request has explicitly
|
||||||
|
@ -772,14 +806,14 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
work_flags = work->flags;
|
raw_spin_lock(&wqe->lock);
|
||||||
raw_spin_lock_irqsave(&wqe->lock, flags);
|
|
||||||
io_wqe_insert_work(wqe, work);
|
io_wqe_insert_work(wqe, work);
|
||||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||||
raw_spin_unlock_irqrestore(&wqe->lock, flags);
|
do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
|
||||||
|
!atomic_read(&acct->nr_running);
|
||||||
|
raw_spin_unlock(&wqe->lock);
|
||||||
|
|
||||||
if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
|
if (do_wake)
|
||||||
!atomic_read(&acct->nr_running))
|
|
||||||
io_wqe_wake_worker(wqe, acct);
|
io_wqe_wake_worker(wqe, acct);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -805,19 +839,18 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
|
||||||
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||||
{
|
{
|
||||||
struct io_cb_cancel_data *match = data;
|
struct io_cb_cancel_data *match = data;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hold the lock to avoid ->cur_work going out of scope, caller
|
* Hold the lock to avoid ->cur_work going out of scope, caller
|
||||||
* may dereference the passed in work.
|
* may dereference the passed in work.
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&worker->lock, flags);
|
spin_lock(&worker->lock);
|
||||||
if (worker->cur_work &&
|
if (worker->cur_work &&
|
||||||
match->fn(worker->cur_work, match->data)) {
|
match->fn(worker->cur_work, match->data)) {
|
||||||
set_notify_signal(worker->task);
|
set_notify_signal(worker->task);
|
||||||
match->nr_running++;
|
match->nr_running++;
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&worker->lock, flags);
|
spin_unlock(&worker->lock);
|
||||||
|
|
||||||
return match->nr_running && !match->cancel_all;
|
return match->nr_running && !match->cancel_all;
|
||||||
}
|
}
|
||||||
|
@ -845,16 +878,15 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
||||||
{
|
{
|
||||||
struct io_wq_work_node *node, *prev;
|
struct io_wq_work_node *node, *prev;
|
||||||
struct io_wq_work *work;
|
struct io_wq_work *work;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
raw_spin_lock_irqsave(&wqe->lock, flags);
|
raw_spin_lock(&wqe->lock);
|
||||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||||
work = container_of(node, struct io_wq_work, list);
|
work = container_of(node, struct io_wq_work, list);
|
||||||
if (!match->fn(work, match->data))
|
if (!match->fn(work, match->data))
|
||||||
continue;
|
continue;
|
||||||
io_wqe_remove_pending(wqe, work, prev);
|
io_wqe_remove_pending(wqe, work, prev);
|
||||||
raw_spin_unlock_irqrestore(&wqe->lock, flags);
|
raw_spin_unlock(&wqe->lock);
|
||||||
io_run_cancel(work, wqe);
|
io_run_cancel(work, wqe);
|
||||||
match->nr_pending++;
|
match->nr_pending++;
|
||||||
if (!match->cancel_all)
|
if (!match->cancel_all)
|
||||||
|
@ -863,7 +895,7 @@ retry:
|
||||||
/* not safe to continue after unlock */
|
/* not safe to continue after unlock */
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irqrestore(&wqe->lock, flags);
|
raw_spin_unlock(&wqe->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
|
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
|
||||||
|
@ -1004,12 +1036,12 @@ err_wq:
|
||||||
|
|
||||||
static bool io_task_work_match(struct callback_head *cb, void *data)
|
static bool io_task_work_match(struct callback_head *cb, void *data)
|
||||||
{
|
{
|
||||||
struct create_worker_data *cwd;
|
struct io_worker *worker;
|
||||||
|
|
||||||
if (cb->func != create_worker_cb)
|
if (cb->func != create_worker_cb)
|
||||||
return false;
|
return false;
|
||||||
cwd = container_of(cb, struct create_worker_data, work);
|
worker = container_of(cb, struct io_worker, create_work);
|
||||||
return cwd->wqe->wq == data;
|
return worker->wqe->wq == data;
|
||||||
}
|
}
|
||||||
|
|
||||||
void io_wq_exit_start(struct io_wq *wq)
|
void io_wq_exit_start(struct io_wq *wq)
|
||||||
|
@ -1026,12 +1058,13 @@ static void io_wq_exit_workers(struct io_wq *wq)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
|
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
|
||||||
struct create_worker_data *cwd;
|
struct io_worker *worker;
|
||||||
|
|
||||||
cwd = container_of(cb, struct create_worker_data, work);
|
worker = container_of(cb, struct io_worker, create_work);
|
||||||
atomic_dec(&cwd->wqe->acct[cwd->index].nr_running);
|
atomic_dec(&worker->wqe->acct[worker->create_index].nr_running);
|
||||||
io_worker_ref_put(wq);
|
io_worker_ref_put(wq);
|
||||||
kfree(cwd);
|
clear_bit_unlock(0, &worker->create_state);
|
||||||
|
io_worker_release(worker);
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -1143,6 +1176,35 @@ int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set max number of unbounded workers, returns old value. If new_count is 0,
|
||||||
|
* then just return the old value.
|
||||||
|
*/
|
||||||
|
int io_wq_max_workers(struct io_wq *wq, int *new_count)
|
||||||
|
{
|
||||||
|
int i, node, prev = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
|
||||||
|
new_count[i] = task_rlimit(current, RLIMIT_NPROC);
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for_each_node(node) {
|
||||||
|
struct io_wqe_acct *acct;
|
||||||
|
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
acct = &wq->wqes[node]->acct[i];
|
||||||
|
prev = max_t(int, acct->max_workers, prev);
|
||||||
|
if (new_count[i])
|
||||||
|
acct->max_workers = new_count[i];
|
||||||
|
new_count[i] = prev;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static __init int io_wq_init(void)
|
static __init int io_wq_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
|
@ -44,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
|
||||||
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||||
struct io_wq_work_list *list)
|
struct io_wq_work_list *list)
|
||||||
{
|
{
|
||||||
|
node->next = NULL;
|
||||||
if (!list->first) {
|
if (!list->first) {
|
||||||
list->last = node;
|
list->last = node;
|
||||||
WRITE_ONCE(list->first, node);
|
WRITE_ONCE(list->first, node);
|
||||||
|
@ -51,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||||
list->last->next = node;
|
list->last->next = node;
|
||||||
list->last = node;
|
list->last = node;
|
||||||
}
|
}
|
||||||
node->next = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void wq_list_cut(struct io_wq_work_list *list,
|
static inline void wq_list_cut(struct io_wq_work_list *list,
|
||||||
|
@ -128,6 +128,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
||||||
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
||||||
|
|
||||||
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
|
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
|
||||||
|
int io_wq_max_workers(struct io_wq *wq, int *new_count);
|
||||||
|
|
||||||
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||||
{
|
{
|
||||||
|
|
1675
fs/io_uring.c
1675
fs/io_uring.c
File diff suppressed because it is too large
Load Diff
|
@ -7,17 +7,18 @@
|
||||||
|
|
||||||
#if defined(CONFIG_IO_URING)
|
#if defined(CONFIG_IO_URING)
|
||||||
struct sock *io_uring_get_socket(struct file *file);
|
struct sock *io_uring_get_socket(struct file *file);
|
||||||
void __io_uring_cancel(struct files_struct *files);
|
void __io_uring_cancel(bool cancel_all);
|
||||||
void __io_uring_free(struct task_struct *tsk);
|
void __io_uring_free(struct task_struct *tsk);
|
||||||
|
|
||||||
static inline void io_uring_files_cancel(struct files_struct *files)
|
static inline void io_uring_files_cancel(void)
|
||||||
{
|
{
|
||||||
if (current->io_uring)
|
if (current->io_uring)
|
||||||
__io_uring_cancel(files);
|
__io_uring_cancel(false);
|
||||||
}
|
}
|
||||||
static inline void io_uring_task_cancel(void)
|
static inline void io_uring_task_cancel(void)
|
||||||
{
|
{
|
||||||
return io_uring_files_cancel(NULL);
|
if (current->io_uring)
|
||||||
|
__io_uring_cancel(true);
|
||||||
}
|
}
|
||||||
static inline void io_uring_free(struct task_struct *tsk)
|
static inline void io_uring_free(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
|
@ -32,7 +33,7 @@ static inline struct sock *io_uring_get_socket(struct file *file)
|
||||||
static inline void io_uring_task_cancel(void)
|
static inline void io_uring_task_cancel(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void io_uring_files_cancel(struct files_struct *files)
|
static inline void io_uring_files_cancel(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void io_uring_free(struct task_struct *tsk)
|
static inline void io_uring_free(struct task_struct *tsk)
|
||||||
|
|
|
@ -421,6 +421,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
|
||||||
struct sockaddr __user *upeer_sockaddr,
|
struct sockaddr __user *upeer_sockaddr,
|
||||||
int __user *upeer_addrlen, int flags,
|
int __user *upeer_addrlen, int flags,
|
||||||
unsigned long nofile);
|
unsigned long nofile);
|
||||||
|
extern struct file *do_accept(struct file *file, unsigned file_flags,
|
||||||
|
struct sockaddr __user *upeer_sockaddr,
|
||||||
|
int __user *upeer_addrlen, int flags);
|
||||||
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
|
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
|
||||||
int __user *upeer_addrlen, int flags);
|
int __user *upeer_addrlen, int flags);
|
||||||
extern int __sys_socket(int family, int type, int protocol);
|
extern int __sys_socket(int family, int type, int protocol);
|
||||||
|
|
|
@ -55,7 +55,10 @@ struct io_uring_sqe {
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
/* personality to use, if used */
|
/* personality to use, if used */
|
||||||
__u16 personality;
|
__u16 personality;
|
||||||
|
union {
|
||||||
__s32 splice_fd_in;
|
__s32 splice_fd_in;
|
||||||
|
__u32 file_index;
|
||||||
|
};
|
||||||
__u64 __pad2[2];
|
__u64 __pad2[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -148,7 +151,11 @@ enum {
|
||||||
*/
|
*/
|
||||||
#define IORING_TIMEOUT_ABS (1U << 0)
|
#define IORING_TIMEOUT_ABS (1U << 0)
|
||||||
#define IORING_TIMEOUT_UPDATE (1U << 1)
|
#define IORING_TIMEOUT_UPDATE (1U << 1)
|
||||||
|
#define IORING_TIMEOUT_BOOTTIME (1U << 2)
|
||||||
|
#define IORING_TIMEOUT_REALTIME (1U << 3)
|
||||||
|
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
|
||||||
|
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
|
||||||
|
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
|
||||||
/*
|
/*
|
||||||
* sqe->splice_flags
|
* sqe->splice_flags
|
||||||
* extends splice(2) flags
|
* extends splice(2) flags
|
||||||
|
@ -306,6 +313,9 @@ enum {
|
||||||
IORING_REGISTER_IOWQ_AFF = 17,
|
IORING_REGISTER_IOWQ_AFF = 17,
|
||||||
IORING_UNREGISTER_IOWQ_AFF = 18,
|
IORING_UNREGISTER_IOWQ_AFF = 18,
|
||||||
|
|
||||||
|
/* set/get max number of workers */
|
||||||
|
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
|
||||||
|
|
||||||
/* this goes last */
|
/* this goes last */
|
||||||
IORING_REGISTER_LAST
|
IORING_REGISTER_LAST
|
||||||
};
|
};
|
||||||
|
|
|
@ -777,7 +777,7 @@ void __noreturn do_exit(long code)
|
||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
|
|
||||||
io_uring_files_cancel(tsk->files);
|
io_uring_files_cancel();
|
||||||
exit_signals(tsk); /* sets PF_EXITING */
|
exit_signals(tsk); /* sets PF_EXITING */
|
||||||
|
|
||||||
/* sync mm's RSS info before statistics gathering */
|
/* sync mm's RSS info before statistics gathering */
|
||||||
|
|
71
net/socket.c
71
net/socket.c
|
@ -1722,32 +1722,22 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
|
||||||
return __sys_listen(fd, backlog);
|
return __sys_listen(fd, backlog);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __sys_accept4_file(struct file *file, unsigned file_flags,
|
struct file *do_accept(struct file *file, unsigned file_flags,
|
||||||
struct sockaddr __user *upeer_sockaddr,
|
struct sockaddr __user *upeer_sockaddr,
|
||||||
int __user *upeer_addrlen, int flags,
|
int __user *upeer_addrlen, int flags)
|
||||||
unsigned long nofile)
|
|
||||||
{
|
{
|
||||||
struct socket *sock, *newsock;
|
struct socket *sock, *newsock;
|
||||||
struct file *newfile;
|
struct file *newfile;
|
||||||
int err, len, newfd;
|
int err, len;
|
||||||
struct sockaddr_storage address;
|
struct sockaddr_storage address;
|
||||||
|
|
||||||
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
|
||||||
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
|
||||||
|
|
||||||
sock = sock_from_file(file);
|
sock = sock_from_file(file);
|
||||||
if (!sock) {
|
if (!sock)
|
||||||
err = -ENOTSOCK;
|
return ERR_PTR(-ENOTSOCK);
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = -ENFILE;
|
|
||||||
newsock = sock_alloc();
|
newsock = sock_alloc();
|
||||||
if (!newsock)
|
if (!newsock)
|
||||||
goto out;
|
return ERR_PTR(-ENFILE);
|
||||||
|
|
||||||
newsock->type = sock->type;
|
newsock->type = sock->type;
|
||||||
newsock->ops = sock->ops;
|
newsock->ops = sock->ops;
|
||||||
|
@ -1758,18 +1748,9 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
|
||||||
*/
|
*/
|
||||||
__module_get(newsock->ops->owner);
|
__module_get(newsock->ops->owner);
|
||||||
|
|
||||||
newfd = __get_unused_fd_flags(flags, nofile);
|
|
||||||
if (unlikely(newfd < 0)) {
|
|
||||||
err = newfd;
|
|
||||||
sock_release(newsock);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
|
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
|
||||||
if (IS_ERR(newfile)) {
|
if (IS_ERR(newfile))
|
||||||
err = PTR_ERR(newfile);
|
return newfile;
|
||||||
put_unused_fd(newfd);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = security_socket_accept(sock, newsock);
|
err = security_socket_accept(sock, newsock);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -1794,16 +1775,38 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* File flags are not inherited via accept() unlike another OSes. */
|
/* File flags are not inherited via accept() unlike another OSes. */
|
||||||
|
return newfile;
|
||||||
fd_install(newfd, newfile);
|
|
||||||
err = newfd;
|
|
||||||
out:
|
|
||||||
return err;
|
|
||||||
out_fd:
|
out_fd:
|
||||||
fput(newfile);
|
fput(newfile);
|
||||||
put_unused_fd(newfd);
|
return ERR_PTR(err);
|
||||||
goto out;
|
}
|
||||||
|
|
||||||
|
int __sys_accept4_file(struct file *file, unsigned file_flags,
|
||||||
|
struct sockaddr __user *upeer_sockaddr,
|
||||||
|
int __user *upeer_addrlen, int flags,
|
||||||
|
unsigned long nofile)
|
||||||
|
{
|
||||||
|
struct file *newfile;
|
||||||
|
int newfd;
|
||||||
|
|
||||||
|
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
|
||||||
|
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
|
||||||
|
|
||||||
|
newfd = __get_unused_fd_flags(flags, nofile);
|
||||||
|
if (unlikely(newfd < 0))
|
||||||
|
return newfd;
|
||||||
|
|
||||||
|
newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
|
||||||
|
flags);
|
||||||
|
if (IS_ERR(newfile)) {
|
||||||
|
put_unused_fd(newfd);
|
||||||
|
return PTR_ERR(newfile);
|
||||||
|
}
|
||||||
|
fd_install(newfd, newfile);
|
||||||
|
return newfd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue