tsan: speed up pthread_setname_np

pthread_setname_np does linear search over all thread descriptors to map pthread_t to the thread descriptor. This has O(N^2) complexity and becomes much worse in the new tsan runtime that keeps all ever existed threads in the thread registry. Replace linear search with direct access if pthread_setname_np is called for the current thread (a very common case). Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D113916
2021-11-15 19:00:31 +01:00 · 2021-11-15 19:00:31 +01:00 · 64b45399e5
parent b85f97bc00
commit 64b45399e5
2 changed files with 51 additions and 2 deletions
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@ -90,6 +90,7 @@ DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
 DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
 DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
 DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
 extern "C" void *pthread_self();
 extern "C" void _exit(int status);
 #if !SANITIZER_NETBSD
@ -2392,8 +2393,11 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
 #define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
  ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)

-#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
-  __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)         \
+  if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+    COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name);                     \
+  else                                                                 \
+    __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)

 #define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)

--- a/compiler-rt/test/tsan/bench_threads.cpp
+++ b/compiler-rt/test/tsan/bench_threads.cpp
@ -0,0 +1,45 @@
+// RUN: %clangxx_tsan %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// bench.h needs pthread barriers which are not available on OS X
+// UNSUPPORTED: darwin
+
+#include "bench.h"
+
+void *nop_thread(void *arg) {
+  pthread_setname_np(pthread_self(), "nop_thread");
+  return nullptr;
+}
+
+void thread(int tid) {
+  for (int i = 0; i < bench_niter; i++) {
+    pthread_t th;
+    pthread_create(&th, nullptr, nop_thread, nullptr);
+    pthread_join(th, nullptr);
+  }
+}
+
+void bench() {
+  // Benchmark thread creation/joining in presence of a large number
+  // of threads (both alive and already joined).
+  printf("starting transient threads...\n");
+  for (int i = 0; i < 200; i++) {
+    const int kBatch = 100;
+    pthread_t th[kBatch];
+    for (int j = 0; j < kBatch; j++)
+      pthread_create(&th[j], nullptr, nop_thread, nullptr);
+    for (int j = 0; j < kBatch; j++)
+      pthread_join(th[j], nullptr);
+  }
+  printf("starting persistent threads...\n");
+  const int kLiveThreads = 2000;
+  pthread_t th[kLiveThreads];
+  for (int j = 0; j < kLiveThreads; j++)
+    pthread_create(&th[j], nullptr, nop_thread, nullptr);
+  printf("starting benchmark threads...\n");
+  start_thread_group(bench_nthread, thread);
+  for (int j = 0; j < kLiveThreads; j++)
+    pthread_join(th[j], nullptr);
+}
+
+// CHECK: DONE