Use sched_getaffinity instead of std:🧵:hardware_concurrency.

The issue with std:🧵:hardware_concurrency is that it forwards to libc and some implementations (like glibc) don't take thread affinity into consideration. With this change a llvm program that can execute in only 2 cores will use 2 threads, even if the machine has 32 cores. This makes benchmarking a lot easier, but should also help if someone doesn't want to use all cores for compilation for example. llvm-svn: 314809
2017-10-03 16:25:15 +00:00 · 2017-10-03 16:25:15 +00:00 · 6e182fbab4
parent c1f906c134
commit 6e182fbab4
9 changed files with 36 additions and 17 deletions
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@ -269,6 +269,7 @@ if( LLVM_USING_GLIBC )
  add_definitions( -D_GNU_SOURCE )
 endif()
 # This check requires _GNU_SOURCE
 check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY)
 if(HAVE_LIBPTHREAD)
  check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
  check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@ -185,6 +185,9 @@
 /* Define to 1 if you have the `setenv' function. */
 #cmakedefine HAVE_SETENV ${HAVE_SETENV}
 /* Define to 1 if you have the `sched_getaffinity' function. */
 #cmakedefine HAVE_SCHED_GETAFFINITY ${HAVE_SCHED_GETAFFINITY}
 /* Define to 1 if you have the `setrlimit' function. */
 #cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT}
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@ -38,8 +38,8 @@ public:
  using TaskTy = std::function<void()>;
  using PackagedTaskTy = std::packaged_task<void()>;
-  /// Construct a pool with the number of core available on the system (or
+  /// Construct a pool with the number of threads found by
-  /// whatever the value returned by std::thread::hardware_concurrency() is).
+  /// hardware_concurrency().
  ThreadPool();
  /// Construct a pool of \p ThreadCount threads
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@ -131,6 +131,14 @@ void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData,
  /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
  unsigned heavyweight_hardware_concurrency();
  /// Get the number of threads that the current program can execute
  /// concurrently. On some systems std::thread::hardware_concurrency() returns
  /// the total number of cores, without taking affinity into consideration.
  /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF.
  /// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is
  /// not available.
  unsigned hardware_concurrency();
  /// \brief Return the current thread id, as used in various OS system calls.
  /// Note that not all platforms guarantee that the value returned will be
  /// unique across the entire system, so portable code should not assume
--- a/llvm/lib/Fuzzer/FuzzerUtil.cpp
+++ b/llvm/lib/Fuzzer/FuzzerUtil.cpp
@ -195,15 +195,7 @@ void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) {
    Printf(FallbackFMT, PC);
 }
-unsigned NumberOfCpuCores() {
+unsigned NumberOfCpuCores() { return hardware_concurrency(); }
  unsigned N = std::thread::hardware_concurrency();
  if (!N) {
    Printf("WARNING: std::thread::hardware_concurrency not well defined for "
           "your platform. Assuming CPU count of 1.\n");
    N = 1;
  }
  return N;
 }
 size_t SimpleFastHash(const uint8_t *Data, size_t Size) {
  size_t Res = 0;
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@ -9,6 +9,7 @@
 #include "llvm/Support/Parallel.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Threading.h"
 #include <atomic>
 #include <stack>
@ -70,8 +71,7 @@ Executor *Executor::getDefaultExecutor() {
 ///   in filo order.
 class ThreadPoolExecutor : public Executor {
 public:
-  explicit ThreadPoolExecutor(
+  explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency())
      unsigned ThreadCount = std::thread::hardware_concurrency())
      : Done(ThreadCount) {
    // Spawn all but one of the threads in another thread as spawning threads
    // can take a while.
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@ -14,14 +14,15 @@
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 #if LLVM_ENABLE_THREADS
-// Default to std::thread::hardware_concurrency
+// Default to hardware_concurrency
-ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {}
+ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {}
 ThreadPool::ThreadPool(unsigned ThreadCount)
    : ActiveThreads(0), EnableFlag(true) {
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@ -47,6 +47,8 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData,
 unsigned llvm::heavyweight_hardware_concurrency() { return 1; }
 unsigned llvm::hardware_concurrency() { return 1; }
 uint64_t llvm::get_threadid() { return 0; }
 uint32_t llvm::get_max_thread_name_length() { return 0; }
@ -71,6 +73,18 @@ unsigned llvm::heavyweight_hardware_concurrency() {
  return NumPhysical;
 }
 unsigned llvm::hardware_concurrency() {
 #ifdef HAVE_SCHED_GETAFFINITY
  cpu_set_t Set;
  if (sched_getaffinity(0, sizeof(Set), &Set))
    return CPU_COUNT(&Set);
 #endif
  // Guard against std::thread::hardware_concurrency() returning 0.
  if (unsigned Val = std::thread::hardware_concurrency())
    return Val;
  return 1;
 }
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Threading.inc"
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@ -211,8 +211,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
  // If NumThreads is not specified, auto-detect a good default.
  if (NumThreads == 0)
-    NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(),
+    NumThreads =
-                                       unsigned(Inputs.size() / 2)));
+        std::min(hardware_concurrency(), unsigned((Inputs.size() + 1) / 2));
  // Initialize the writer contexts.
  SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;