[llvm-cov] Prevent llvm-cov from using too many threads

As reported here: https://reviews.llvm.org/D75153#1987272

Before, each instance of llvm-cov was creating one thread per hardware core, which wasn't needed probably because the number of inputs were small. This was probably causing a thread rlimit issue on large core count systems.

After this patch, the previous behavior is restored (to what was before rG8404aeb5):

If --num-threads is not specified, we create one thread per input, up to num.cores.
When specified, --num-threads indicates any number of threads, with no upper limit.

Differential Revision: https://reviews.llvm.org/D78408
This commit is contained in:
Alexandre Ganea 2020-04-24 15:28:01 -04:00
parent 0e2bd49370
commit 0e13a0331f
6 changed files with 43 additions and 25 deletions

View File

@ -157,6 +157,10 @@ void llvm_execute_on_thread_async(
// std::thread per core.
bool UseHyperThreads = true;
// If set, will constrain 'ThreadsRequested' to the number of hardware
// threads, or hardware cores.
bool Limit = false;
/// Retrieves the max available threads for the current strategy. This
/// accounts for affinity masks and takes advantage of all CPU sockets.
unsigned compute_thread_count() const;

View File

@ -84,14 +84,15 @@ void llvm::llvm_execute_on_thread_async(
int computeHostNumHardwareThreads();
unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
if (ThreadsRequested > 0)
return ThreadsRequested;
int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads()
: sys::getHostNumPhysicalCores();
if (MaxThreadCount <= 0)
MaxThreadCount = 1;
return MaxThreadCount;
if (ThreadsRequested == 0)
return MaxThreadCount;
if (!Limit)
return ThreadsRequested;
return std::min((unsigned)MaxThreadCount, ThreadsRequested);
}
namespace {

View File

@ -547,10 +547,14 @@ int main(int argc, char **argv) {
// Shared a single binary holder for all the link steps.
BinaryHolder BinHolder;
unsigned ThreadCount = Options.LinkOpts.Threads;
if (!ThreadCount)
ThreadCount = DebugMapPtrsOrErr->size();
ThreadPool Threads(hardware_concurrency(ThreadCount));
ThreadPoolStrategy S = hardware_concurrency(Options.LinkOpts.Threads);
if (Options.LinkOpts.Threads == 0) {
// If NumThreads is not specified, create one thread for each input, up to
// the number of hardware threads.
S.ThreadsRequested = DebugMapPtrsOrErr->size();
S.Limit = true;
}
ThreadPool Threads(S);
// If there is more than one link to execute, we need to generate
// temporary files.
@ -625,7 +629,7 @@ int main(int argc, char **argv) {
// FIXME: The DwarfLinker can have some very deep recursion that can max
// out the (significantly smaller) stack when using threads. We don't
// want this limitation when we only have a single thread.
if (ThreadCount == 1)
if (S.ThreadsRequested == 1)
LinkLambda(OS, Options.LinkOpts);
else
Threads.async(LinkLambda, OS, Options.LinkOpts);

View File

@ -943,19 +943,21 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
(SourceFiles.size() != 1) || ViewOpts.hasOutputDirectory() ||
(ViewOpts.Format == CoverageViewOptions::OutputFormat::HTML);
auto NumThreads = ViewOpts.NumThreads;
ThreadPoolStrategy S = hardware_concurrency(ViewOpts.NumThreads);
if (ViewOpts.NumThreads == 0) {
// If NumThreads is not specified, create one thread for each input, up to
// the number of hardware cores.
S = heavyweight_hardware_concurrency(SourceFiles.size());
S.Limit = true;
}
// If NumThreads is not specified, auto-detect a good default.
if (NumThreads == 0)
NumThreads = SourceFiles.size();
if (!ViewOpts.hasOutputDirectory() || NumThreads == 1) {
if (!ViewOpts.hasOutputDirectory() || S.ThreadsRequested == 1) {
for (const std::string &SourceFile : SourceFiles)
writeSourceFileView(SourceFile, Coverage.get(), Printer.get(),
ShowFilenames);
} else {
// In -output-dir mode, it's safe to use multiple threads to print files.
ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
ThreadPool Pool(S);
for (const std::string &SourceFile : SourceFiles)
Pool.async(&CodeCoverageTool::writeSourceFileView, this, SourceFile,
Coverage.get(), Printer.get(), ShowFilenames);

View File

@ -163,10 +163,14 @@ json::Array renderFiles(const coverage::CoverageMapping &Coverage,
ArrayRef<std::string> SourceFiles,
ArrayRef<FileCoverageSummary> FileReports,
const CoverageViewOptions &Options) {
auto NumThreads = Options.NumThreads;
if (NumThreads == 0)
NumThreads = SourceFiles.size();
ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
ThreadPoolStrategy S = hardware_concurrency(Options.NumThreads);
if (Options.NumThreads == 0) {
// If NumThreads is not specified, create one thread for each input, up to
// the number of hardware cores.
S = heavyweight_hardware_concurrency(SourceFiles.size());
S.Limit = true;
}
ThreadPool Pool(S);
json::Array FileArray;
std::mutex FileArrayMutex;

View File

@ -352,12 +352,15 @@ std::vector<FileCoverageSummary> CoverageReport::prepareFileReports(
ArrayRef<std::string> Files, const CoverageViewOptions &Options,
const CoverageFilter &Filters) {
unsigned LCP = getRedundantPrefixLen(Files);
auto NumThreads = Options.NumThreads;
// If NumThreads is not specified, auto-detect a good default.
if (NumThreads == 0)
NumThreads = Files.size();
ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
ThreadPoolStrategy S = hardware_concurrency(Options.NumThreads);
if (Options.NumThreads == 0) {
// If NumThreads is not specified, create one thread for each input, up to
// the number of hardware cores.
S = heavyweight_hardware_concurrency(Files.size());
S.Limit = true;
}
ThreadPool Pool(S);
std::vector<FileCoverageSummary> FileReports;
FileReports.reserve(Files.size());