forked from OSchip/llvm-project
Use sched_getaffinity instead of std:🧵:hardware_concurrency.
The issue with std:🧵:hardware_concurrency is that it forwards
to libc and some implementations (like glibc) don't take thread
affinity into consideration.
With this change a llvm program that can execute in only 2 cores will
use 2 threads, even if the machine has 32 cores.
This makes benchmarking a lot easier, but should also help if someone
doesn't want to use all cores for compilation for example.
llvm-svn: 314809
This commit is contained in:
parent
c1f906c134
commit
6e182fbab4
|
@ -269,6 +269,7 @@ if( LLVM_USING_GLIBC )
|
||||||
add_definitions( -D_GNU_SOURCE )
|
add_definitions( -D_GNU_SOURCE )
|
||||||
endif()
|
endif()
|
||||||
# This check requires _GNU_SOURCE
|
# This check requires _GNU_SOURCE
|
||||||
|
check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY)
|
||||||
if(HAVE_LIBPTHREAD)
|
if(HAVE_LIBPTHREAD)
|
||||||
check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
|
check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
|
||||||
check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
|
check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
|
||||||
|
|
|
@ -185,6 +185,9 @@
|
||||||
/* Define to 1 if you have the `setenv' function. */
|
/* Define to 1 if you have the `setenv' function. */
|
||||||
#cmakedefine HAVE_SETENV ${HAVE_SETENV}
|
#cmakedefine HAVE_SETENV ${HAVE_SETENV}
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `sched_getaffinity' function. */
|
||||||
|
#cmakedefine HAVE_SCHED_GETAFFINITY ${HAVE_SCHED_GETAFFINITY}
|
||||||
|
|
||||||
/* Define to 1 if you have the `setrlimit' function. */
|
/* Define to 1 if you have the `setrlimit' function. */
|
||||||
#cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT}
|
#cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT}
|
||||||
|
|
||||||
|
|
|
@ -38,8 +38,8 @@ public:
|
||||||
using TaskTy = std::function<void()>;
|
using TaskTy = std::function<void()>;
|
||||||
using PackagedTaskTy = std::packaged_task<void()>;
|
using PackagedTaskTy = std::packaged_task<void()>;
|
||||||
|
|
||||||
/// Construct a pool with the number of core available on the system (or
|
/// Construct a pool with the number of threads found by
|
||||||
/// whatever the value returned by std::thread::hardware_concurrency() is).
|
/// hardware_concurrency().
|
||||||
ThreadPool();
|
ThreadPool();
|
||||||
|
|
||||||
/// Construct a pool of \p ThreadCount threads
|
/// Construct a pool of \p ThreadCount threads
|
||||||
|
|
|
@ -131,6 +131,14 @@ void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData,
|
||||||
/// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
|
/// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
|
||||||
unsigned heavyweight_hardware_concurrency();
|
unsigned heavyweight_hardware_concurrency();
|
||||||
|
|
||||||
|
/// Get the number of threads that the current program can execute
|
||||||
|
/// concurrently. On some systems std::thread::hardware_concurrency() returns
|
||||||
|
/// the total number of cores, without taking affinity into consideration.
|
||||||
|
/// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF.
|
||||||
|
/// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is
|
||||||
|
/// not available.
|
||||||
|
unsigned hardware_concurrency();
|
||||||
|
|
||||||
/// \brief Return the current thread id, as used in various OS system calls.
|
/// \brief Return the current thread id, as used in various OS system calls.
|
||||||
/// Note that not all platforms guarantee that the value returned will be
|
/// Note that not all platforms guarantee that the value returned will be
|
||||||
/// unique across the entire system, so portable code should not assume
|
/// unique across the entire system, so portable code should not assume
|
||||||
|
|
|
@ -195,15 +195,7 @@ void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) {
|
||||||
Printf(FallbackFMT, PC);
|
Printf(FallbackFMT, PC);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned NumberOfCpuCores() {
|
unsigned NumberOfCpuCores() { return hardware_concurrency(); }
|
||||||
unsigned N = std::thread::hardware_concurrency();
|
|
||||||
if (!N) {
|
|
||||||
Printf("WARNING: std::thread::hardware_concurrency not well defined for "
|
|
||||||
"your platform. Assuming CPU count of 1.\n");
|
|
||||||
N = 1;
|
|
||||||
}
|
|
||||||
return N;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t SimpleFastHash(const uint8_t *Data, size_t Size) {
|
size_t SimpleFastHash(const uint8_t *Data, size_t Size) {
|
||||||
size_t Res = 0;
|
size_t Res = 0;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "llvm/Support/Parallel.h"
|
#include "llvm/Support/Parallel.h"
|
||||||
#include "llvm/Config/llvm-config.h"
|
#include "llvm/Config/llvm-config.h"
|
||||||
|
#include "llvm/Support/Threading.h"
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
@ -70,8 +71,7 @@ Executor *Executor::getDefaultExecutor() {
|
||||||
/// in filo order.
|
/// in filo order.
|
||||||
class ThreadPoolExecutor : public Executor {
|
class ThreadPoolExecutor : public Executor {
|
||||||
public:
|
public:
|
||||||
explicit ThreadPoolExecutor(
|
explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency())
|
||||||
unsigned ThreadCount = std::thread::hardware_concurrency())
|
|
||||||
: Done(ThreadCount) {
|
: Done(ThreadCount) {
|
||||||
// Spawn all but one of the threads in another thread as spawning threads
|
// Spawn all but one of the threads in another thread as spawning threads
|
||||||
// can take a while.
|
// can take a while.
|
||||||
|
|
|
@ -14,14 +14,15 @@
|
||||||
#include "llvm/Support/ThreadPool.h"
|
#include "llvm/Support/ThreadPool.h"
|
||||||
|
|
||||||
#include "llvm/Config/llvm-config.h"
|
#include "llvm/Config/llvm-config.h"
|
||||||
|
#include "llvm/Support/Threading.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
#if LLVM_ENABLE_THREADS
|
#if LLVM_ENABLE_THREADS
|
||||||
|
|
||||||
// Default to std::thread::hardware_concurrency
|
// Default to hardware_concurrency
|
||||||
ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {}
|
ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {}
|
||||||
|
|
||||||
ThreadPool::ThreadPool(unsigned ThreadCount)
|
ThreadPool::ThreadPool(unsigned ThreadCount)
|
||||||
: ActiveThreads(0), EnableFlag(true) {
|
: ActiveThreads(0), EnableFlag(true) {
|
||||||
|
|
|
@ -47,6 +47,8 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData,
|
||||||
|
|
||||||
unsigned llvm::heavyweight_hardware_concurrency() { return 1; }
|
unsigned llvm::heavyweight_hardware_concurrency() { return 1; }
|
||||||
|
|
||||||
|
unsigned llvm::hardware_concurrency() { return 1; }
|
||||||
|
|
||||||
uint64_t llvm::get_threadid() { return 0; }
|
uint64_t llvm::get_threadid() { return 0; }
|
||||||
|
|
||||||
uint32_t llvm::get_max_thread_name_length() { return 0; }
|
uint32_t llvm::get_max_thread_name_length() { return 0; }
|
||||||
|
@ -71,6 +73,18 @@ unsigned llvm::heavyweight_hardware_concurrency() {
|
||||||
return NumPhysical;
|
return NumPhysical;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned llvm::hardware_concurrency() {
|
||||||
|
#ifdef HAVE_SCHED_GETAFFINITY
|
||||||
|
cpu_set_t Set;
|
||||||
|
if (sched_getaffinity(0, sizeof(Set), &Set))
|
||||||
|
return CPU_COUNT(&Set);
|
||||||
|
#endif
|
||||||
|
// Guard against std::thread::hardware_concurrency() returning 0.
|
||||||
|
if (unsigned Val = std::thread::hardware_concurrency())
|
||||||
|
return Val;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Include the platform-specific parts of this class.
|
// Include the platform-specific parts of this class.
|
||||||
#ifdef LLVM_ON_UNIX
|
#ifdef LLVM_ON_UNIX
|
||||||
#include "Unix/Threading.inc"
|
#include "Unix/Threading.inc"
|
||||||
|
|
|
@ -211,8 +211,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
|
||||||
|
|
||||||
// If NumThreads is not specified, auto-detect a good default.
|
// If NumThreads is not specified, auto-detect a good default.
|
||||||
if (NumThreads == 0)
|
if (NumThreads == 0)
|
||||||
NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(),
|
NumThreads =
|
||||||
unsigned(Inputs.size() / 2)));
|
std::min(hardware_concurrency(), unsigned((Inputs.size() + 1) / 2));
|
||||||
|
|
||||||
// Initialize the writer contexts.
|
// Initialize the writer contexts.
|
||||||
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
|
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
|
||||||
|
|
Loading…
Reference in New Issue