forked from OSchip/llvm-project
tsan: fix failures after multi-threaded fork
Creating threads after a multi-threaded fork is semi-supported,
we don't give particular guarantees, but we try to not fail
on simple cases and we have die_after_fork=0 flag that enables
not dying on creation of threads after a multi-threaded fork.
This flag is used in the wild:
23c052e3e3/SConstruct (L3599)
fork_multithreaded.cpp test started hanging in debug mode
after the recent "tsan: fix deadlock during race reporting" commit,
which added proactive ThreadRegistryLock check in SlotLock.
But the test broke earlier after "tsan: remove quadratic behavior in pthread_join"
commit which made tracking of alive threads based on pthread_t stricter
(CHECK-fail on 2 threads with the same pthread_t, or joining a non-existent thread).
When we start a thread after a multi-threaded fork, the new pthread_t
can actually match one of existing values (for threads that don't exist anymore).
Thread creation started CHECK-failing on this, but the test simply
ignored this CHECK failure in the child thread and "passed".
But after "tsan: fix deadlock during race reporting" the test started hanging dead,
because CHECK failures recursively lock thread registry.
Fix this purging all alive threads from thread registry on fork.
Also the thread registry mutex somehow lost the internal deadlock detector id
and was excluded from deadlock detection. If it would have the id, the CHECK
wouldn't hang because of the nested CHECK failure due to the deadlock.
But then again the test would have silently ignore this error as well
and the bugs wouldn't have been noticed.
Add the deadlock detector id to the thread registry mutex.
Also extend the test to check more cases and detect more bugs.
Reviewed By: melver
Differential Revision: https://reviews.llvm.org/D116091
This commit is contained in:
parent
d5b3cb0711
commit
d95baa98f3
|
@ -110,7 +110,7 @@ ThreadRegistry::ThreadRegistry(ThreadContextFactory factory, u32 max_threads,
|
|||
max_threads_(max_threads),
|
||||
thread_quarantine_size_(thread_quarantine_size),
|
||||
max_reuse_(max_reuse),
|
||||
mtx_(),
|
||||
mtx_(MutexThreadRegistry),
|
||||
total_threads_(0),
|
||||
alive_threads_(0),
|
||||
max_alive_threads_(0),
|
||||
|
@ -365,4 +365,20 @@ void ThreadRegistry::SetThreadUserId(u32 tid, uptr user_id) {
|
|||
CHECK(live_.try_emplace(user_id, tctx->tid).second);
|
||||
}
|
||||
|
||||
u32 ThreadRegistry::OnFork(u32 tid) {
|
||||
ThreadRegistryLock l(this);
|
||||
// We only purge user_id (pthread_t) of live threads because
|
||||
// they cause CHECK failures if new threads with matching pthread_t
|
||||
// created after fork.
|
||||
// Potentially we could purge more info (ThreadContextBase themselves),
|
||||
// but it's hard to test and easy to introduce new issues by doing this.
|
||||
for (auto *tctx : threads_) {
|
||||
if (tctx->tid == tid || !tctx->user_id)
|
||||
continue;
|
||||
CHECK(live_.erase(tctx->user_id));
|
||||
tctx->user_id = 0;
|
||||
}
|
||||
return alive_threads_;
|
||||
}
|
||||
|
||||
} // namespace __sanitizer
|
||||
|
|
|
@ -133,6 +133,11 @@ class MUTEX ThreadRegistry {
|
|||
u32 ConsumeThreadUserId(uptr user_id);
|
||||
void SetThreadUserId(u32 tid, uptr user_id);
|
||||
|
||||
// OnFork must be called in the child process after fork to purge old
|
||||
// threads that don't exist anymore (except for the current thread tid).
|
||||
// Returns number of alive threads before fork.
|
||||
u32 OnFork(u32 tid);
|
||||
|
||||
private:
|
||||
const ThreadContextFactory context_factory_;
|
||||
const u32 max_threads_;
|
||||
|
|
|
@ -801,7 +801,7 @@ void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); }
|
|||
|
||||
void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) {
|
||||
ForkAfter(thr);
|
||||
u32 nthread = ThreadCount(thr);
|
||||
u32 nthread = ctx->thread_registry.OnFork(thr->tid);
|
||||
VPrintf(1,
|
||||
"ThreadSanitizer: forked new process with pid %d,"
|
||||
" parent had %d threads\n",
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-DIE
|
||||
// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 66 2>&1 | FileCheck %s -check-prefix=CHECK-DIE
|
||||
// RUN: %clangxx_tsan -O1 %s -o %t && %env_tsan_opts=die_after_fork=0 %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-NODIE
|
||||
#include "test.h"
|
||||
#include <errno.h>
|
||||
|
@ -6,7 +6,7 @@
|
|||
#include <sys/wait.h>
|
||||
|
||||
static void *sleeper(void *p) {
|
||||
sleep(1000); // not intended to exit during test
|
||||
barrier_wait(&barrier);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -14,31 +14,53 @@ static void *nop(void *p) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int main() {
|
||||
barrier_init(&barrier, 2);
|
||||
pthread_t th;
|
||||
pthread_create(&th, 0, sleeper, 0);
|
||||
switch (fork()) {
|
||||
default: // parent
|
||||
while (wait(0) < 0) {}
|
||||
break;
|
||||
case 0: // child
|
||||
{
|
||||
pthread_t th2;
|
||||
pthread_create(&th2, 0, nop, 0);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
case -1: // error
|
||||
int main(int argc, const char **argv) {
|
||||
barrier_init(&barrier, 3);
|
||||
const int kSleeperThreads = 2;
|
||||
barrier_init(&barrier, kSleeperThreads + 1);
|
||||
pthread_t th0[kSleeperThreads];
|
||||
for (int i = 0; i < kSleeperThreads; i++)
|
||||
pthread_create(&th0[i], 0, sleeper, 0);
|
||||
const int kNopThreads = 5;
|
||||
pthread_t th1[kNopThreads];
|
||||
for (int i = 0; i < kNopThreads; i++)
|
||||
pthread_create(&th1[i], 0, nop, 0);
|
||||
for (int i = 0; i < kNopThreads; i++)
|
||||
pthread_join(th1[i], 0);
|
||||
int pid = fork();
|
||||
if (pid < 0) {
|
||||
fprintf(stderr, "failed to fork (%d)\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
if (pid == 0) {
|
||||
// child
|
||||
const int kChildThreads = 4;
|
||||
pthread_t th2[kChildThreads];
|
||||
for (int i = 0; i < kChildThreads; i++)
|
||||
pthread_create(&th2[i], 0, nop, 0);
|
||||
for (int i = 0; i < kChildThreads; i++)
|
||||
pthread_join(th2[i], 0);
|
||||
exit(0);
|
||||
return 0;
|
||||
}
|
||||
// parent
|
||||
int expect = argc > 1 ? atoi(argv[1]) : 0;
|
||||
int status = 0;
|
||||
while (waitpid(pid, &status, 0) != pid) {
|
||||
}
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status) != expect) {
|
||||
fprintf(stderr, "subprocess exited with %d, expected %d\n", status, expect);
|
||||
exit(1);
|
||||
}
|
||||
barrier_wait(&barrier);
|
||||
for (int i = 0; i < kSleeperThreads; i++)
|
||||
pthread_join(th0[i], 0);
|
||||
fprintf(stderr, "OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK-DIE: ThreadSanitizer: starting new threads after multi-threaded fork is not supported
|
||||
// CHECK-DIE: OK
|
||||
|
||||
// CHECK-NODIE-NOT: ThreadSanitizer: starting new threads after multi-threaded fork is not supported
|
||||
// CHECK-NODIE-NOT: ThreadSanitizer:
|
||||
// CHECK-NODIE: OK
|
||||
|
||||
// CHECK-NODIE-NOT: ThreadSanitizer:
|
||||
|
|
Loading…
Reference in New Issue