tsan: fix deadlock during race reporting

SlotPairLocker calls SlotLock under ctx->multi_slot_mtx.
SlotLock can invoke global reset DoReset if we are out of slots/epochs.
But DoReset locks ctx->multi_slot_mtx as well, which leads to deadlock.

Resolve the deadlock by removing SlotPairLocker/multi_slot_mtx
and only lock one slot for which we will do RestoreStack.
We need to lock that slot because RestoreStack accesses the slot journal.
But it's unclear why we need to lock the current slot.
Initially I did it just to be on the safer side (but at that time
we dit not lock the second slot, so it was easy just to lock the current slot).

Reviewed By: melver

Differential Revision: https://reviews.llvm.org/D116040
This commit is contained in:
Dmitry Vyukov 2021-12-20 14:03:13 +01:00
parent ac719d7c9a
commit 2eb3e20461
7 changed files with 41 additions and 55 deletions

View File

@ -243,7 +243,6 @@ enum {
MutexTypeTrace,
MutexTypeSlot,
MutexTypeSlots,
MutexTypeMultiSlot,
};
} // namespace __tsan

View File

@ -210,7 +210,6 @@ static void DoResetImpl(uptr epoch) {
// error: expecting mutex 'slot.mtx' to be held at start of each loop
void DoReset(ThreadState* thr, uptr epoch) NO_THREAD_SAFETY_ANALYSIS {
{
Lock l(&ctx->multi_slot_mtx);
for (auto& slot : ctx->slots) {
slot.mtx.Lock();
if (UNLIKELY(epoch == 0))
@ -337,6 +336,13 @@ void SlotDetach(ThreadState* thr) {
void SlotLock(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS {
DCHECK(!thr->slot_locked);
#if SANITIZER_DEBUG
// Check these mutexes are not locked.
// We can call DoReset from SlotAttachAndLock, which will lock
// these mutexes, but it happens only every once in a while.
{ ThreadRegistryLock lock(&ctx->thread_registry); }
{ Lock lock(&ctx->slot_mtx); }
#endif
TidSlot* slot = thr->slot;
slot->mtx.Lock();
thr->slot_locked = true;
@ -367,7 +373,6 @@ Context::Context()
fired_suppressions_mtx(MutexTypeFired),
clock_alloc(LINKER_INITIALIZED, "clock allocator"),
slot_mtx(MutexTypeSlots),
multi_slot_mtx(MutexTypeMultiSlot),
resetting() {
fired_suppressions.reserve(8);
for (uptr i = 0; i < ARRAY_SIZE(slots); i++) {
@ -767,7 +772,6 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
// Detaching from the slot makes OnUserFree skip writing to the shadow.
// The slot will be locked so any attempts to use it will deadlock anyway.
SlotDetach(thr);
ctx->multi_slot_mtx.Lock();
for (auto& slot : ctx->slots) slot.mtx.Lock();
ctx->thread_registry.Lock();
ctx->slot_mtx.Lock();
@ -799,7 +803,6 @@ static void ForkAfter(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS {
ctx->slot_mtx.Unlock();
ctx->thread_registry.Unlock();
for (auto& slot : ctx->slots) slot.mtx.Unlock();
ctx->multi_slot_mtx.Unlock();
SlotAttachAndLock(thr);
SlotUnlock(thr);
GlobalProcessorUnlock();
@ -1053,9 +1056,7 @@ MutexMeta mutex_meta[] = {
{MutexTypeAtExit, "AtExit", {}},
{MutexTypeFired, "Fired", {MutexLeaf}},
{MutexTypeRacy, "Racy", {MutexLeaf}},
{MutexTypeGlobalProc,
"GlobalProc",
{MutexTypeSlot, MutexTypeSlots, MutexTypeMultiSlot}},
{MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}},
{MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}},
{MutexTypeTrace, "Trace", {}},
{MutexTypeSlot,
@ -1063,7 +1064,6 @@ MutexMeta mutex_meta[] = {
{MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry,
MutexTypeSlots}},
{MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}},
{MutexTypeMultiSlot, "MultiSlot", {MutexTypeSlot, MutexTypeSlots}},
{},
};

View File

@ -333,8 +333,6 @@ struct Context {
// Protects global_epoch, slot_queue, trace_part_recycle.
Mutex slot_mtx;
// Prevents lock order inversions when we lock more than 1 slot.
Mutex multi_slot_mtx;
uptr global_epoch; // guarded by slot_mtx and by all slot mutexes
bool resetting; // global reset is in progress
IList<TidSlot, &TidSlot::node> slot_queue GUARDED_BY(slot_mtx);
@ -609,16 +607,6 @@ enum FiberSwitchFlags {
FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync
};
class SlotPairLocker {
public:
SlotPairLocker(ThreadState *thr, Sid sid);
~SlotPairLocker();
private:
ThreadState *thr_;
TidSlot *slot_;
};
class SlotLocker {
public:
ALWAYS_INLINE

View File

@ -552,7 +552,9 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr,
FastState last_lock, StackID creation_stack_id) {
SlotPairLocker locker(thr, last_lock.sid());
// We need to lock the slot during RestoreStack because it protects
// the slot journal.
Lock slot_lock(&ctx->slots[static_cast<uptr>(last_lock.sid())].mtx);
ThreadRegistryLock l0(&ctx->thread_registry);
Lock slots_lock(&ctx->slot_mtx);
ScopedReport rep(ReportTypeMutexDestroyLocked);

View File

@ -729,27 +729,6 @@ static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) {
return false;
}
// We need to lock the target slot during RestoreStack because it protects
// the slot journal. However, the target slot can be the slot of the current
// thread or a different slot.
SlotPairLocker::SlotPairLocker(ThreadState *thr,
Sid sid) NO_THREAD_SAFETY_ANALYSIS : thr_(thr),
slot_() {
CHECK_NE(sid, kFreeSid);
Lock l(&ctx->multi_slot_mtx);
SlotLock(thr);
if (sid == thr->slot->sid)
return;
slot_ = &ctx->slots[static_cast<uptr>(sid)];
slot_->mtx.Lock();
}
SlotPairLocker::~SlotPairLocker() NO_THREAD_SAFETY_ANALYSIS {
SlotUnlock(thr_);
if (slot_)
slot_->mtx.Unlock();
}
void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
AccessType typ0) {
CheckedMutex::CheckNoLocks();
@ -806,7 +785,9 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
DynamicMutexSet mset1;
MutexSet *mset[kMop] = {&thr->mset, mset1};
SlotPairLocker locker(thr, s[1].sid());
// We need to lock the slot during RestoreStack because it protects
// the slot journal.
Lock slot_lock(&ctx->slots[static_cast<uptr>(s[1].sid())].mtx);
ThreadRegistryLock l0(&ctx->thread_registry);
Lock slots_lock(&ctx->slot_mtx);
if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1,

View File

@ -88,7 +88,7 @@ TRACE_TEST(Trace, RestoreAccess) {
// The previous one is equivalent, but RestoreStack must prefer
// the last of the matchig accesses.
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
SlotPairLocker locker(thr, thr->fast_state.sid());
Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
@ -148,7 +148,7 @@ TRACE_TEST(Trace, MemoryAccessSize) {
kAccessRead);
break;
}
SlotPairLocker locker(thr, thr->fast_state.sid());
Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
@ -176,7 +176,7 @@ TRACE_TEST(Trace, RestoreMutexLock) {
TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001);
TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5001, 0x6002);
SlotPairLocker locker(thr, thr->fast_state.sid());
Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
@ -219,7 +219,7 @@ TRACE_TEST(Trace, MultiPart) {
FuncEntry(thr, 0x4000);
TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001);
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
SlotPairLocker locker(thr, thr->fast_state.sid());
Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;

View File

@ -1,10 +1,13 @@
// RUN: %clangxx_tsan -O1 %s -o %t && %env_tsan_opts=flush_memory_ms=1:flush_symbolizer_ms=1:memory_limit_mb=1 %run %t 2>&1 | FileCheck %s
// This run stresses global reset happenning concurrently with everything else.
// RUN: %clangxx_tsan -O1 %s -o %t && %env_tsan_opts=flush_memory_ms=1:flush_symbolizer_ms=1:memory_limit_mb=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NORACE
// This run stresses race reporting happenning concurrently with everything else.
// RUN: %clangxx_tsan -O1 %s -DRACE=1 -o %t && %env_tsan_opts=suppress_equal_stacks=0:suppress_equal_addresses=0 %deflake %run %t | FileCheck %s --check-prefix=CHECK-RACE
#include "test.h"
#include <fcntl.h>
#include <string.h>
volatile long stop;
long atomic, read_only;
long atomic, read_only, racy;
int fds[2];
__attribute__((noinline)) void *SecondaryThread(void *x) {
@ -22,7 +25,7 @@ void *Thread(void *x) {
// just read the stop variable
}
if (me == 0 || me == 2) {
__atomic_store_n(&atomic, 1, __ATOMIC_RELEASE);
__atomic_store_n(&atomic, sink, __ATOMIC_RELEASE);
}
if (me == 0 || me == 3) {
sink += __atomic_fetch_add(&atomic, 1, __ATOMIC_ACQ_REL);
@ -47,6 +50,13 @@ void *Thread(void *x) {
memcpy(&buf, &read_only, sizeof(buf));
sink += buf;
}
if (me == 0 || me == 9) {
#if RACE
sink += racy++;
#else
sink += racy;
#endif
}
// If you add more actions, update kActions in main.
}
return NULL;
@ -60,8 +70,12 @@ int main() {
exit((perror("fcntl"), 1));
if (fcntl(fds[1], F_SETFL, O_NONBLOCK))
exit((perror("fcntl"), 1));
const int kActions = 9;
const int kActions = 10;
#if RACE
const int kMultiplier = 1;
#else
const int kMultiplier = 4;
#endif
pthread_t t[kActions * kMultiplier];
for (int i = 0; i < kActions * kMultiplier; i++)
pthread_create(&t[i], NULL, Thread, (void *)(long)(i % kActions));
@ -73,6 +87,8 @@ int main() {
return 0;
}
// CHECK-NOT: ThreadSanitizer:
// CHECK: DONE
// CHECK-NOT: ThreadSanitizer:
// CHECK-NORACE-NOT: ThreadSanitizer:
// CHECK-NORACE: DONE
// CHECK-NORACE-NOT: ThreadSanitizer:
// CHECK-RACE: ThreadSanitizer: data race
// CHECK-RACE: DONE