io-wq: remove worker to owner tw dependency
INFO: task iou-wrk-6609:6612 blocked for more than 143 seconds. Not tainted 5.15.0-rc5-syzkaller #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:iou-wrk-6609 state:D stack:27944 pid: 6612 ppid: 6526 flags:0x00004006 Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xb44/0x5960 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 schedule_timeout+0x1db/0x2a0 kernel/time/timer.c:1857 do_wait_for_common kernel/sched/completion.c:85 [inline] __wait_for_common kernel/sched/completion.c:106 [inline] wait_for_common kernel/sched/completion.c:117 [inline] wait_for_completion+0x176/0x280 kernel/sched/completion.c:138 io_worker_exit fs/io-wq.c:183 [inline] io_wqe_worker+0x66d/0xc40 fs/io-wq.c:597 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 io-wq worker may submit a task_work to the master task and upon io_worker_exit() wait for the tw to get executed. The problem appears when the master task is waiting in coredump.c: 468 freezer_do_not_count(); 469 wait_for_completion(&core_state->startup); 470 freezer_count(); Apparently having some dependency on children threads getting everything stuck. Workaround it by cancelling the taks_work callback that causes it before going into io_worker_exit() waiting. p.s. probably a better option is to not submit tw elevating the refcount in the first place, but let's leave this excercise for the future. Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+27d62ee6f256b186883e@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/142a716f4ed936feae868959059154362bfa8c19.1635509451.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
f75d118349
commit
1d5f5ea7cb
46
fs/io-wq.c
46
fs/io-wq.c
|
@ -140,6 +140,7 @@ static void io_wqe_dec_running(struct io_worker *worker);
|
|||
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct,
|
||||
struct io_cb_cancel_data *match);
|
||||
static void create_worker_cb(struct callback_head *cb);
|
||||
|
||||
static bool io_worker_get(struct io_worker *worker)
|
||||
{
|
||||
|
@ -174,9 +175,44 @@ static void io_worker_ref_put(struct io_wq *wq)
|
|||
complete(&wq->worker_done);
|
||||
}
|
||||
|
||||
static void io_worker_cancel_cb(struct io_worker *worker)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&worker->wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&worker->wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
}
|
||||
|
||||
static bool io_task_worker_match(struct callback_head *cb, void *data)
|
||||
{
|
||||
struct io_worker *worker;
|
||||
|
||||
if (cb->func != create_worker_cb)
|
||||
return false;
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
return worker == data;
|
||||
}
|
||||
|
||||
static void io_worker_exit(struct io_worker *worker)
|
||||
{
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
while (1) {
|
||||
struct callback_head *cb = task_work_cancel_match(wq->task,
|
||||
io_task_worker_match, worker);
|
||||
|
||||
if (!cb)
|
||||
break;
|
||||
io_worker_cancel_cb(worker);
|
||||
}
|
||||
|
||||
io_worker_release(worker);
|
||||
wait_for_completion(&worker->ref_done);
|
||||
|
@ -1148,17 +1184,9 @@ static void io_wq_exit_workers(struct io_wq *wq)
|
|||
|
||||
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
|
||||
struct io_worker *worker;
|
||||
struct io_wqe_acct *acct;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
acct = io_wqe_get_acct(worker);
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&worker->wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&worker->wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
io_worker_cancel_cb(worker);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
|
|
Loading…
Reference in New Issue