tsan: speed up pthread_setname_np

pthread_setname_np does linear search over all thread descriptors
to map pthread_t to the thread descriptor. This has O(N^2) complexity
and becomes much worse in the new tsan runtime that keeps all ever
existed threads in the thread registry.
Replace linear search with direct access if pthread_setname_np
is called for the current thread (a very common case).

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D113916
This commit is contained in:
Dmitry Vyukov 2021-11-15 19:00:31 +01:00
parent b85f97bc00
commit 64b45399e5
2 changed files with 51 additions and 2 deletions

View File

@ -90,6 +90,7 @@ DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
extern "C" int pthread_equal(void *t1, void *t2);
extern "C" void *pthread_self();
extern "C" void _exit(int status);
#if !SANITIZER_NETBSD
@ -2392,8 +2393,11 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
__tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name); \
else \
__tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
#define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)

View File

@ -0,0 +1,45 @@
// RUN: %clangxx_tsan %s -o %t
// RUN: %run %t 2>&1 | FileCheck %s
// bench.h needs pthread barriers which are not available on OS X
// UNSUPPORTED: darwin
#include "bench.h"
void *nop_thread(void *arg) {
pthread_setname_np(pthread_self(), "nop_thread");
return nullptr;
}
void thread(int tid) {
for (int i = 0; i < bench_niter; i++) {
pthread_t th;
pthread_create(&th, nullptr, nop_thread, nullptr);
pthread_join(th, nullptr);
}
}
void bench() {
// Benchmark thread creation/joining in presence of a large number
// of threads (both alive and already joined).
printf("starting transient threads...\n");
for (int i = 0; i < 200; i++) {
const int kBatch = 100;
pthread_t th[kBatch];
for (int j = 0; j < kBatch; j++)
pthread_create(&th[j], nullptr, nop_thread, nullptr);
for (int j = 0; j < kBatch; j++)
pthread_join(th[j], nullptr);
}
printf("starting persistent threads...\n");
const int kLiveThreads = 2000;
pthread_t th[kLiveThreads];
for (int j = 0; j < kLiveThreads; j++)
pthread_create(&th[j], nullptr, nop_thread, nullptr);
printf("starting benchmark threads...\n");
start_thread_group(bench_nthread, thread);
for (int j = 0; j < kLiveThreads; j++)
pthread_join(th[j], nullptr);
}
// CHECK: DONE