tsan: fix signal handling during stop-the-world

Long story short: stop-the-world briefly resets SIGSEGV handler to SIG_DFL.
This breaks programs that handle and continue after SIGSEGV (namely JVM).
See the test and comments for details.

http://reviews.llvm.org/D7722

llvm-svn: 229678
This commit is contained in:
Dmitry Vyukov 2015-02-18 15:13:29 +00:00
parent eaa4c73d62
commit 8870ee7755
2 changed files with 85 additions and 50 deletions

View File

@ -170,10 +170,9 @@ bool ThreadSuspender::SuspendAllThreads() {
// Pointer to the ThreadSuspender instance for use in signal handler.
static ThreadSuspender *thread_suspender_instance = NULL;
// Signals that should not be blocked (this is used in the parent thread as well
// as the tracer thread).
static const int kUnblockedSignals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV,
SIGBUS, SIGXCPU, SIGXFSZ };
// Synchronous signals that should not be blocked.
static const int kSyncSignals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
SIGXCPU, SIGXFSZ };
// Structure for passing arguments into the tracer thread.
struct TracerThreadArgument {
@ -188,7 +187,7 @@ struct TracerThreadArgument {
static DieCallbackType old_die_callback;
// Signal handler to wake up suspended threads when the tracer thread dies.
void TracerThreadSignalHandler(int signum, void *siginfo, void *) {
static void TracerThreadSignalHandler(int signum, void *siginfo, void *) {
if (thread_suspender_instance != NULL) {
if (signum == SIGABRT)
thread_suspender_instance->KillAllThreads();
@ -228,6 +227,7 @@ static int TracerThread(void* argument) {
tracer_thread_argument->mutex.Lock();
tracer_thread_argument->mutex.Unlock();
old_die_callback = GetDieCallback();
SetDieCallback(TracerThreadDieCallback);
ThreadSuspender thread_suspender(internal_getppid());
@ -242,17 +242,14 @@ static int TracerThread(void* argument) {
handler_stack.ss_size = kHandlerStackSize;
internal_sigaltstack(&handler_stack, NULL);
// Install our handler for fatal signals. Other signals should be blocked by
// the mask we inherited from the caller thread.
for (uptr signal_index = 0; signal_index < ARRAY_SIZE(kUnblockedSignals);
signal_index++) {
__sanitizer_sigaction new_sigaction;
internal_memset(&new_sigaction, 0, sizeof(new_sigaction));
new_sigaction.sigaction = TracerThreadSignalHandler;
new_sigaction.sa_flags = SA_ONSTACK | SA_SIGINFO;
internal_sigfillset(&new_sigaction.sa_mask);
internal_sigaction_norestorer(kUnblockedSignals[signal_index],
&new_sigaction, NULL);
// Install our handler for synchronous signals. Other signals should be
// blocked by the mask we inherited from the parent thread.
for (uptr i = 0; i < ARRAY_SIZE(kSyncSignals); i++) {
__sanitizer_sigaction act;
internal_memset(&act, 0, sizeof(act));
act.sigaction = TracerThreadSignalHandler;
act.sa_flags = SA_ONSTACK | SA_SIGINFO;
internal_sigaction_norestorer(kSyncSignals[i], &act, 0);
}
int exit_code = 0;
@ -265,9 +262,11 @@ static int TracerThread(void* argument) {
thread_suspender.ResumeAllThreads();
exit_code = 0;
}
// Note, this is a bad race. If TracerThreadDieCallback is already started
// in another thread and observed that thread_suspender_instance != 0,
// it can call KillAllThreads on the destroyed variable.
SetDieCallback(old_die_callback);
thread_suspender_instance = NULL;
handler_stack.ss_flags = SS_DISABLE;
internal_sigaltstack(&handler_stack, NULL);
return exit_code;
}
@ -299,53 +298,21 @@ class ScopedStackSpaceWithGuard {
// into globals.
static __sanitizer_sigset_t blocked_sigset;
static __sanitizer_sigset_t old_sigset;
static __sanitizer_sigaction old_sigactions
[ARRAY_SIZE(kUnblockedSignals)];
class StopTheWorldScope {
public:
StopTheWorldScope() {
// Block all signals that can be blocked safely, and install
// default handlers for the remaining signals.
// We cannot allow user-defined handlers to run while the ThreadSuspender
// thread is active, because they could conceivably call some libc functions
// which modify errno (which is shared between the two threads).
internal_sigfillset(&blocked_sigset);
for (uptr signal_index = 0; signal_index < ARRAY_SIZE(kUnblockedSignals);
signal_index++) {
// Remove the signal from the set of blocked signals.
internal_sigdelset(&blocked_sigset, kUnblockedSignals[signal_index]);
// Install the default handler.
__sanitizer_sigaction new_sigaction;
internal_memset(&new_sigaction, 0, sizeof(new_sigaction));
new_sigaction.handler = SIG_DFL;
internal_sigfillset(&new_sigaction.sa_mask);
internal_sigaction_norestorer(kUnblockedSignals[signal_index],
&new_sigaction, &old_sigactions[signal_index]);
}
int sigprocmask_status =
internal_sigprocmask(SIG_BLOCK, &blocked_sigset, &old_sigset);
CHECK_EQ(sigprocmask_status, 0); // sigprocmask should never fail
// Make this process dumpable. Processes that are not dumpable cannot be
// attached to.
process_was_dumpable_ = internal_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
if (!process_was_dumpable_)
internal_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
old_die_callback = GetDieCallback();
}
~StopTheWorldScope() {
SetDieCallback(old_die_callback);
// Restore the dumpable flag.
if (!process_was_dumpable_)
internal_prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
// Restore the signal handlers.
for (uptr signal_index = 0; signal_index < ARRAY_SIZE(kUnblockedSignals);
signal_index++) {
internal_sigaction_norestorer(kUnblockedSignals[signal_index],
&old_sigactions[signal_index], NULL);
}
internal_sigprocmask(SIG_SETMASK, &old_sigset, &old_sigset);
}
private:
@ -378,11 +345,36 @@ void StopTheWorld(StopTheWorldCallback callback, void *argument) {
// Block the execution of TracerThread until after we have set ptrace
// permissions.
tracer_thread_argument.mutex.Lock();
// Signal handling story.
// We don't want async signals to be delivered to the tracer thread,
// so we block all async signals before creating the thread. An async signal
// handler can temporary modify errno, which is shared with this thread.
// We ought to use pthread_sigmask here, because sigprocmask has undefined
// behavior in multithreaded programs. However, on linux sigprocmask is
// equivalent to pthread_sigmask with the exception that pthread_sigmask
// does not allow to block some signals used internally in pthread
// implementation. We are fine with blocking them here, we are really not
// going to pthread_cancel the thread.
// The tracer thread should not raise any synchronous signals. But in case it
// does, we setup a special handler for sync signals that properly kills the
// parent as well. Note: we don't pass CLONE_SIGHAND to clone, so handlers
// in the tracer thread won't interfere with user program. Double note: if a
// user does something along the lines of 'kill -11 pid', that can kill the
// process even if user setup own handler for SEGV.
// Thing to watch out for: this code should not change behavior of user code
// in any observable way. In particular it should not override user signal
// handlers.
internal_sigfillset(&blocked_sigset);
for (uptr i = 0; i < ARRAY_SIZE(kSyncSignals); i++)
internal_sigdelset(&blocked_sigset, kSyncSignals[i]);
int rv = internal_sigprocmask(SIG_BLOCK, &blocked_sigset, &old_sigset);
CHECK_EQ(rv, 0);
uptr tracer_pid = internal_clone(
TracerThread, tracer_stack.Bottom(),
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_UNTRACED,
&tracer_thread_argument, 0 /* parent_tidptr */, 0 /* newtls */, 0
/* child_tidptr */);
internal_sigprocmask(SIG_SETMASK, &old_sigset, 0);
int local_errno = 0;
if (internal_iserror(tracer_pid, &local_errno)) {
VReport(1, "Failed spawning a tracer thread (errno %d).\n", local_errno);

View File

@ -0,0 +1,43 @@
// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="flush_memory_ms=1 memory_limit_mb=1" %run %t 2>&1 | FileCheck %s
// JVM uses SEGV to preempt threads. All threads do a load from a known address
// periodically. When runtime needs to preempt threads, it unmaps the page.
// Threads start triggering SEGV one by one. The signal handler blocks
// threads while runtime does its thing. Then runtime maps the page again
// and resumes the threads.
// Previously this pattern conflicted with stop-the-world machinery,
// because it briefly reset SEGV handler to SIG_DFL.
// As the consequence JVM just silently died.
// This test sets memory flushing rate to maximum, then does series of
// "benign" SEGVs that are handled by signal handler, and ensures that
// the process survive.
#include "test.h"
#include <signal.h>
#include <sys/mman.h>
void *guard;
void handler(int signo, siginfo_t *info, void *uctx) {
mprotect(guard, 4096, PROT_READ | PROT_WRITE);
}
int main() {
// Disabled for now, because there is another issue in ptrace.
// It will be addressed in a subsequent change.
goto done;
struct sigaction a;
a.sa_sigaction = handler;
a.sa_flags = SA_SIGINFO;
sigaction(SIGSEGV, &a, 0);
guard = mmap(0, 4096, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
for (int i = 0; i < 1000000; i++) {
mprotect(guard, 4096, PROT_NONE);
*(int*)guard = 1;
}
done:
fprintf(stderr, "DONE\n");
}
// CHECK: DONE