From cbe9dac379047730e39c7e570eddd27124b0d2dc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 21 Jun 2022 12:39:51 -0500 Subject: [PATCH 1/3] signal: Ensure SIGNAL_GROUP_EXIT gets set in do_group_exit The function do_group_exit has an optimization that avoids taking siglock and doing the work to find other threads in the signal group and shutting them down. It is very desirable for SIGNAL_GROUP_EXIT to always been set whenever it is decided for the process to exit. That ensures only a single place needs to be tested, and a single bit of state needs to be looked at. This makes the optimization in do_group_exit counter productive. Make the code and maintenance simpler by removing this unnecessary option. Link: https://lkml.kernel.org/r/87letod4v3.fsf_-_@email.froward.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index 64c938ce36fe..a3929e5e6d61 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -905,7 +905,7 @@ do_group_exit(int exit_code) exit_code = sig->group_exit_code; else if (sig->group_exec_task) exit_code = 0; - else if (!thread_group_empty(current)) { + else { struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); From d80f7d7b2c75c5954d335dffbccca62a5002c3e0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 21 Jun 2022 14:38:52 -0500 Subject: [PATCH 2/3] signal: Guarantee that SIGNAL_GROUP_EXIT is set on process exit Track how many threads have not started exiting and when the last thread starts exiting set SIGNAL_GROUP_EXIT. This guarantees that SIGNAL_GROUP_EXIT will get set when a process exits. In practice this achieves nothing as glibc's implementation of _exit calls sys_group_exit then sys_exit. While glibc's implemenation of pthread_exit calls exit (which cleansup and calls _exit) if it is the last thread and sys_exit if it is the last thread. This means the only way the kernel might observe a process that does not set call exit_group is if the language runtime does not use glibc. With more cleanups I hope to move the decrement of quick_threads earlier. Link: https://lkml.kernel.org/r/87bkukd4tc.fsf_-_@email.froward.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 1 + kernel/exit.c | 18 ++++++++++++++++++ kernel/fork.c | 2 ++ 3 files changed, 21 insertions(+) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index cafbe03eed01..20099268fa25 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -94,6 +94,7 @@ struct signal_struct { refcount_t sigcnt; atomic_t live; int nr_threads; + int quick_threads; struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ diff --git a/kernel/exit.c b/kernel/exit.c index a3929e5e6d61..d8ecbaa514f7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -733,11 +733,29 @@ static void check_stack_usage(void) static inline void check_stack_usage(void) {} #endif +static void synchronize_group_exit(struct task_struct *tsk, long code) +{ + struct sighand_struct *sighand = tsk->sighand; + struct signal_struct *signal = tsk->signal; + + spin_lock_irq(&sighand->siglock); + signal->quick_threads--; + if ((signal->quick_threads == 0) && + !(signal->flags & SIGNAL_GROUP_EXIT)) { + signal->flags = SIGNAL_GROUP_EXIT; + signal->group_exit_code = code; + signal->group_stop_count = 0; + } + spin_unlock_irq(&sighand->siglock); +} + void __noreturn do_exit(long code) { struct task_struct *tsk = current; int group_dead; + synchronize_group_exit(tsk, code); + WARN_ON(tsk->plug); kcov_task_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 9d44f2d46c69..67813b25a567 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1692,6 +1692,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; sig->nr_threads = 1; + sig->quick_threads = 1; atomic_set(&sig->live, 1); refcount_set(&sig->sigcnt, 1); @@ -2444,6 +2445,7 @@ static __latent_entropy struct task_struct *copy_process( __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; + current->signal->quick_threads++; atomic_inc(¤t->signal->live); refcount_inc(¤t->signal->sigcnt); task_join_group_stop(p); From 9a95f78eab70deeb5a4c879c19b841a6af5b66e7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 8 Jan 2022 10:37:00 -0600 Subject: [PATCH 3/3] signal: Drop signals received after a fatal signal has been processed In 403bad72b67d ("coredump: only SIGKILL should interrupt the coredumping task") Oleg modified the kernel to drop all signals that come in during a coredump except SIGKILL, and suggested that it might be a good idea to generalize that to other cases after the process has received a fatal signal. Semantically it does not make sense to perform any signal delivery after the process has already been killed. When a signal is sent while a process is dying today the signal is placed in the signal queue by __send_signal and a single task of the process is woken up with signal_wake_up, if there are any tasks that have not set PF_EXITING. Take things one step farther and have prepare_signal report that all signals that come after a process has been killed should be ignored. While retaining the historical exception of allowing SIGKILL to interrupt coredumps. Update the comment in fs/coredump.c to make it clear coredumps are special in being able to receive SIGKILL. This changes things so that a process stopped in PTRACE_EVENT_EXIT can not be made to escape it's ptracer and finish exiting by sending it SIGKILL. That a process can be made to leave PTRACE_EVENT_EXIT and escape it's tracer by sending the process a SIGKILL has been complicating tracer's for no apparent advantage. If the process needs to be made to leave PTRACE_EVENT_EXIT all that needs to happen is to kill the proceses's tracer. This differs from the coredump code where there is no other mechanism besides honoring SIGKILL to expedite the end of coredumping. Link: https://lkml.kernel.org/r/875yksd4s9.fsf_-_@email.froward.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/coredump.c | 2 +- kernel/signal.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index ebc43f960b64..b836948c9543 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -354,7 +354,7 @@ static int zap_process(struct task_struct *start, int exit_code) struct task_struct *t; int nr = 0; - /* ignore all signals except SIGKILL, see prepare_signal() */ + /* Allow SIGKILL, see prepare_signal() */ start->signal->flags = SIGNAL_GROUP_EXIT; start->signal->group_exit_code = exit_code; start->signal->group_stop_count = 0; diff --git a/kernel/signal.c b/kernel/signal.c index 6f86fda5e432..8a0f114d00e0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -913,8 +913,9 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) if (signal->core_state) return sig == SIGKILL; /* - * The process is in the middle of dying, nothing to do. + * The process is in the middle of dying, drop the signal. */ + return false; } else if (sig_kernel_stop(sig)) { /* * This is a stop signal. Remove SIGCONT from all queues.