[llvm-cov] Prevent llvm-cov from using too many threads

As reported here: https://reviews.llvm.org/D75153#1987272 Before, each instance of llvm-cov was creating one thread per hardware core, which wasn't needed probably because the number of inputs were small. This was probably causing a thread rlimit issue on large core count systems. After this patch, the previous behavior is restored (to what was before rG8404aeb5): If --num-threads is not specified, we create one thread per input, up to num.cores. When specified, --num-threads indicates any number of threads, with no upper limit. Differential Revision: https://reviews.llvm.org/D78408
2020-04-24 15:28:01 -04:00 · 2020-04-24 15:28:01 -04:00 · 0e13a0331f
parent 0e2bd49370
commit 0e13a0331f
6 changed files with 43 additions and 25 deletions
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@ -157,6 +157,10 @@ void llvm_execute_on_thread_async(
    // std::thread per core.
    bool UseHyperThreads = true;

+    // If set, will constrain 'ThreadsRequested' to the number of hardware
+    // threads, or hardware cores.
+    bool Limit = false;
+
    /// Retrieves the max available threads for the current strategy. This
    /// accounts for affinity masks and takes advantage of all CPU sockets.
    unsigned compute_thread_count() const;
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@ -84,14 +84,15 @@ void llvm::llvm_execute_on_thread_async(
 int computeHostNumHardwareThreads();

 unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
-  if (ThreadsRequested > 0)
-    return ThreadsRequested;
-
  int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads()
                                       : sys::getHostNumPhysicalCores();
  if (MaxThreadCount <= 0)
    MaxThreadCount = 1;
-  return MaxThreadCount;
+  if (ThreadsRequested == 0)
+    return MaxThreadCount;
+  if (!Limit)
+    return ThreadsRequested;
+  return std::min((unsigned)MaxThreadCount, ThreadsRequested);
 }

 namespace {
--- a/llvm/tools/dsymutil/dsymutil.cpp
+++ b/llvm/tools/dsymutil/dsymutil.cpp
@ -547,10 +547,14 @@ int main(int argc, char **argv) {
    // Shared a single binary holder for all the link steps.
    BinaryHolder BinHolder;

-    unsigned ThreadCount = Options.LinkOpts.Threads;
-    if (!ThreadCount)
-      ThreadCount = DebugMapPtrsOrErr->size();
-    ThreadPool Threads(hardware_concurrency(ThreadCount));
+    ThreadPoolStrategy S = hardware_concurrency(Options.LinkOpts.Threads);
+    if (Options.LinkOpts.Threads == 0) {
+      // If NumThreads is not specified, create one thread for each input, up to
+      // the number of hardware threads.
+      S.ThreadsRequested = DebugMapPtrsOrErr->size();
+      S.Limit = true;
+    }
+    ThreadPool Threads(S);

    // If there is more than one link to execute, we need to generate
    // temporary files.
@ -625,7 +629,7 @@ int main(int argc, char **argv) {
      // FIXME: The DwarfLinker can have some very deep recursion that can max
      // out the (significantly smaller) stack when using threads. We don't
      // want this limitation when we only have a single thread.
-      if (ThreadCount == 1)
+      if (S.ThreadsRequested == 1)
        LinkLambda(OS, Options.LinkOpts);
      else
        Threads.async(LinkLambda, OS, Options.LinkOpts);
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@ -943,19 +943,21 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
      (SourceFiles.size() != 1) || ViewOpts.hasOutputDirectory() ||
      (ViewOpts.Format == CoverageViewOptions::OutputFormat::HTML);

-  auto NumThreads = ViewOpts.NumThreads;
+  ThreadPoolStrategy S = hardware_concurrency(ViewOpts.NumThreads);
+  if (ViewOpts.NumThreads == 0) {
+    // If NumThreads is not specified, create one thread for each input, up to
+    // the number of hardware cores.
+    S = heavyweight_hardware_concurrency(SourceFiles.size());
+    S.Limit = true;
+  }

-  // If NumThreads is not specified, auto-detect a good default.
-  if (NumThreads == 0)
-    NumThreads = SourceFiles.size();
-
-  if (!ViewOpts.hasOutputDirectory() || NumThreads == 1) {
+  if (!ViewOpts.hasOutputDirectory() || S.ThreadsRequested == 1) {
    for (const std::string &SourceFile : SourceFiles)
      writeSourceFileView(SourceFile, Coverage.get(), Printer.get(),
                          ShowFilenames);
  } else {
    // In -output-dir mode, it's safe to use multiple threads to print files.
-    ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
+    ThreadPool Pool(S);
    for (const std::string &SourceFile : SourceFiles)
      Pool.async(&CodeCoverageTool::writeSourceFileView, this, SourceFile,
                 Coverage.get(), Printer.get(), ShowFilenames);
--- a/llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@ -163,10 +163,14 @@ json::Array renderFiles(const coverage::CoverageMapping &Coverage,
                        ArrayRef<std::string> SourceFiles,
                        ArrayRef<FileCoverageSummary> FileReports,
                        const CoverageViewOptions &Options) {
-  auto NumThreads = Options.NumThreads;
-  if (NumThreads == 0)
-    NumThreads = SourceFiles.size();
-  ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
+  ThreadPoolStrategy S = hardware_concurrency(Options.NumThreads);
+  if (Options.NumThreads == 0) {
+    // If NumThreads is not specified, create one thread for each input, up to
+    // the number of hardware cores.
+    S = heavyweight_hardware_concurrency(SourceFiles.size());
+    S.Limit = true;
+  }
+  ThreadPool Pool(S);
  json::Array FileArray;
  std::mutex FileArrayMutex;

--- a/llvm/tools/llvm-cov/CoverageReport.cpp
+++ b/llvm/tools/llvm-cov/CoverageReport.cpp
@ -352,12 +352,15 @@ std::vector<FileCoverageSummary> CoverageReport::prepareFileReports(
    ArrayRef<std::string> Files, const CoverageViewOptions &Options,
    const CoverageFilter &Filters) {
  unsigned LCP = getRedundantPrefixLen(Files);
-  auto NumThreads = Options.NumThreads;

-  // If NumThreads is not specified, auto-detect a good default.
-  if (NumThreads == 0)
-    NumThreads = Files.size();
-  ThreadPool Pool(heavyweight_hardware_concurrency(NumThreads));
+  ThreadPoolStrategy S = hardware_concurrency(Options.NumThreads);
+  if (Options.NumThreads == 0) {
+    // If NumThreads is not specified, create one thread for each input, up to
+    // the number of hardware cores.
+    S = heavyweight_hardware_concurrency(Files.size());
+    S.Limit = true;
+  }
+  ThreadPool Pool(S);

  std::vector<FileCoverageSummary> FileReports;
  FileReports.reserve(Files.size());