fix group stop with exit race

do_signal_stop() counts all sub-thread and sets ->group_stop_count
accordingly.  Every thread should decrement ->group_stop_count and stop,
the last one should notify the parent.

However a sub-thread can exit before it notices the signal_pending(), or it
may be somewhere in do_exit() already.  In that case the group stop never
finishes properly.

Note: this is a minimal fix, we can add some optimizations later.  Say we
can return quickly if thread_group_empty().  Also, we can move some signal
related code from exit_notify() to exit_signals().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Oleg Nesterov 2008-02-08 04:19:12 -08:00 committed by Linus Torvalds
parent 430c623121
commit d12619b5ff
3 changed files with 28 additions and 2 deletions

View File

@ -241,6 +241,7 @@ extern int show_unhandled_signals;
struct pt_regs; struct pt_regs;
extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
extern void exit_signals(struct task_struct *tsk);
extern struct kmem_cache *sighand_cachep; extern struct kmem_cache *sighand_cachep;

View File

@ -947,7 +947,7 @@ fastcall NORET_TYPE void do_exit(long code)
schedule(); schedule();
} }
tsk->flags |= PF_EXITING; exit_signals(tsk); /* sets PF_EXITING */
/* /*
* tsk->flags are checked in the futex code to protect against * tsk->flags are checked in the futex code to protect against
* an exiting task cleaning up the robust pi futexes. * an exiting task cleaning up the robust pi futexes.

View File

@ -1739,7 +1739,7 @@ static int do_signal_stop(int signr)
* stop is always done with the siglock held, * stop is always done with the siglock held,
* so this check has no races. * so this check has no races.
*/ */
if (!t->exit_state && if (!(t->flags & PF_EXITING) &&
!task_is_stopped_or_traced(t)) { !task_is_stopped_or_traced(t)) {
stop_count++; stop_count++;
signal_wake_up(t, 0); signal_wake_up(t, 0);
@ -1900,6 +1900,31 @@ relock:
return signr; return signr;
} }
void exit_signals(struct task_struct *tsk)
{
int group_stop = 0;
spin_lock_irq(&tsk->sighand->siglock);
if (unlikely(tsk->signal->group_stop_count) &&
!--tsk->signal->group_stop_count) {
tsk->signal->flags = SIGNAL_STOP_STOPPED;
group_stop = 1;
}
/*
* From now this task is not visible for group-wide signals,
* see wants_signal(), do_signal_stop().
*/
tsk->flags |= PF_EXITING;
spin_unlock_irq(&tsk->sighand->siglock);
if (unlikely(group_stop)) {
read_lock(&tasklist_lock);
do_notify_parent_cldstop(tsk, CLD_STOPPED);
read_unlock(&tasklist_lock);
}
}
EXPORT_SYMBOL(recalc_sigpending); EXPORT_SYMBOL(recalc_sigpending);
EXPORT_SYMBOL_GPL(dequeue_signal); EXPORT_SYMBOL_GPL(dequeue_signal);
EXPORT_SYMBOL(flush_signals); EXPORT_SYMBOL(flush_signals);