2019-06-13 05:45:28 +08:00
|
|
|
//===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===//
|
2019-06-13 05:32:49 +08:00
|
|
|
//
|
2019-06-13 05:45:28 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2019-06-13 05:32:49 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "clang/Frontend/CompilerInstance.h"
|
|
|
|
#include "clang/Tooling/CommonOptionsParser.h"
|
2019-08-07 04:43:25 +08:00
|
|
|
#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
|
2019-10-22 13:05:18 +08:00
|
|
|
#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
|
2019-06-27 05:11:51 +08:00
|
|
|
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
|
2019-06-13 05:32:49 +08:00
|
|
|
#include "clang/Tooling/JSONCompilationDatabase.h"
|
2019-11-15 06:47:11 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
#include "llvm/Support/FileUtilities.h"
|
2019-06-13 05:32:49 +08:00
|
|
|
#include "llvm/Support/InitLLVM.h"
|
2019-10-29 05:26:45 +08:00
|
|
|
#include "llvm/Support/JSON.h"
|
2019-06-13 05:32:49 +08:00
|
|
|
#include "llvm/Support/Program.h"
|
|
|
|
#include "llvm/Support/Signals.h"
|
2020-02-14 05:53:25 +08:00
|
|
|
#include "llvm/Support/ThreadPool.h"
|
2019-06-13 05:32:49 +08:00
|
|
|
#include "llvm/Support/Threading.h"
|
2019-06-13 05:52:36 +08:00
|
|
|
#include <mutex>
|
2019-06-13 05:32:49 +08:00
|
|
|
#include <thread>
|
|
|
|
|
|
|
|
using namespace clang;
|
2019-06-27 05:11:51 +08:00
|
|
|
using namespace tooling::dependencies;
|
2019-06-13 05:32:49 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2019-06-22 02:24:55 +08:00
|
|
|
class SharedStream {
|
|
|
|
public:
|
|
|
|
SharedStream(raw_ostream &OS) : OS(OS) {}
|
|
|
|
void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) {
|
|
|
|
std::unique_lock<std::mutex> LockGuard(Lock);
|
|
|
|
Fn(OS);
|
|
|
|
OS.flush();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::mutex Lock;
|
|
|
|
raw_ostream &OS;
|
|
|
|
};
|
|
|
|
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
class ResourceDirectoryCache {
|
|
|
|
public:
|
|
|
|
/// findResourceDir finds the resource directory relative to the clang
|
|
|
|
/// compiler being used in Args, by running it with "-print-resource-dir"
|
|
|
|
/// option and cache the results for reuse. \returns resource directory path
|
|
|
|
/// associated with the given invocation command or empty string if the
|
|
|
|
/// compiler path is NOT an absolute path.
|
|
|
|
StringRef findResourceDir(const tooling::CommandLineArguments &Args) {
|
|
|
|
if (Args.size() < 1)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
const std::string &ClangBinaryPath = Args[0];
|
|
|
|
if (!llvm::sys::path::is_absolute(ClangBinaryPath))
|
|
|
|
return "";
|
|
|
|
|
|
|
|
const std::string &ClangBinaryName =
|
2020-01-29 03:23:46 +08:00
|
|
|
std::string(llvm::sys::path::filename(ClangBinaryPath));
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
|
|
|
|
std::unique_lock<std::mutex> LockGuard(CacheLock);
|
|
|
|
const auto &CachedResourceDir = Cache.find(ClangBinaryPath);
|
|
|
|
if (CachedResourceDir != Cache.end())
|
|
|
|
return CachedResourceDir->second;
|
|
|
|
|
|
|
|
std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName,
|
|
|
|
"-print-resource-dir"};
|
|
|
|
llvm::SmallString<64> OutputFile, ErrorFile;
|
|
|
|
llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
|
|
|
|
"" /*no-suffix*/, OutputFile);
|
|
|
|
llvm::sys::fs::createTemporaryFile("print-resource-dir-error",
|
|
|
|
"" /*no-suffix*/, ErrorFile);
|
|
|
|
llvm::FileRemover OutputRemover(OutputFile.c_str());
|
|
|
|
llvm::FileRemover ErrorRemover(ErrorFile.c_str());
|
|
|
|
llvm::Optional<StringRef> Redirects[] = {
|
|
|
|
{""}, // Stdin
|
|
|
|
StringRef(OutputFile),
|
|
|
|
StringRef(ErrorFile),
|
|
|
|
};
|
|
|
|
if (const int RC = llvm::sys::ExecuteAndWait(
|
|
|
|
ClangBinaryPath, PrintResourceDirArgs, {}, Redirects)) {
|
|
|
|
auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str());
|
|
|
|
llvm::errs() << ErrorBuf.get()->getBuffer();
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str());
|
|
|
|
if (!OutputBuf)
|
|
|
|
return "";
|
|
|
|
StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n');
|
|
|
|
|
|
|
|
Cache[ClangBinaryPath] = Output.str();
|
|
|
|
return Cache[ClangBinaryPath];
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::map<std::string, std::string> Cache;
|
|
|
|
std::mutex CacheLock;
|
|
|
|
};
|
|
|
|
|
2019-06-13 05:32:49 +08:00
|
|
|
llvm::cl::opt<bool> Help("h", llvm::cl::desc("Alias for -help"),
|
|
|
|
llvm::cl::Hidden);
|
|
|
|
|
|
|
|
llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
|
|
|
|
|
2019-08-07 04:43:25 +08:00
|
|
|
static llvm::cl::opt<ScanningMode> ScanMode(
|
|
|
|
"mode",
|
|
|
|
llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
|
|
|
|
llvm::cl::values(
|
|
|
|
clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
|
|
|
|
"preprocess-minimized-sources",
|
|
|
|
"The set of dependencies is computed by preprocessing the "
|
|
|
|
"source files that were minimized to only include the "
|
|
|
|
"contents that might affect the dependencies"),
|
|
|
|
clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
|
|
|
|
"The set of dependencies is computed by preprocessing the "
|
|
|
|
"unmodified source files")),
|
2019-09-13 03:00:32 +08:00
|
|
|
llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing),
|
|
|
|
llvm::cl::cat(DependencyScannerCategory));
|
2019-08-07 04:43:25 +08:00
|
|
|
|
2019-10-17 03:28:35 +08:00
|
|
|
static llvm::cl::opt<ScanningOutputFormat> Format(
|
|
|
|
"format", llvm::cl::desc("The output format for the dependencies"),
|
|
|
|
llvm::cl::values(clEnumValN(ScanningOutputFormat::Make, "make",
|
|
|
|
"Makefile compatible dep file"),
|
|
|
|
clEnumValN(ScanningOutputFormat::Full, "experimental-full",
|
|
|
|
"Full dependency graph suitable"
|
|
|
|
" for explicitly building modules. This format "
|
|
|
|
"is experimental and will change.")),
|
|
|
|
llvm::cl::init(ScanningOutputFormat::Make),
|
|
|
|
llvm::cl::cat(DependencyScannerCategory));
|
|
|
|
|
2019-10-29 05:26:45 +08:00
|
|
|
static llvm::cl::opt<bool> FullCommandLine(
|
|
|
|
"full-command-line",
|
|
|
|
llvm::cl::desc("Include the full command lines to use to build modules"),
|
|
|
|
llvm::cl::init(false), llvm::cl::cat(DependencyScannerCategory));
|
|
|
|
|
2019-06-13 05:32:49 +08:00
|
|
|
llvm::cl::opt<unsigned>
|
|
|
|
NumThreads("j", llvm::cl::Optional,
|
|
|
|
llvm::cl::desc("Number of worker threads to use (default: use "
|
|
|
|
"all concurrent threads)"),
|
2019-09-13 03:00:32 +08:00
|
|
|
llvm::cl::init(0), llvm::cl::cat(DependencyScannerCategory));
|
2019-06-13 05:32:49 +08:00
|
|
|
|
|
|
|
llvm::cl::opt<std::string>
|
|
|
|
CompilationDB("compilation-database",
|
|
|
|
llvm::cl::desc("Compilation database"), llvm::cl::Required,
|
|
|
|
llvm::cl::cat(DependencyScannerCategory));
|
|
|
|
|
2019-08-30 06:56:38 +08:00
|
|
|
llvm::cl::opt<bool> ReuseFileManager(
|
|
|
|
"reuse-filemanager",
|
|
|
|
llvm::cl::desc("Reuse the file manager and its cache between invocations."),
|
|
|
|
llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory));
|
|
|
|
|
2019-09-12 04:40:31 +08:00
|
|
|
llvm::cl::opt<bool> SkipExcludedPPRanges(
|
|
|
|
"skip-excluded-pp-ranges",
|
|
|
|
llvm::cl::desc(
|
|
|
|
"Use the preprocessor optimization that skips excluded conditionals by "
|
|
|
|
"bumping the buffer pointer in the lexer instead of lexing the tokens "
|
|
|
|
"until reaching the end directive."),
|
|
|
|
llvm::cl::init(true), llvm::cl::cat(DependencyScannerCategory));
|
|
|
|
|
2019-09-18 03:45:24 +08:00
|
|
|
llvm::cl::opt<bool> Verbose("v", llvm::cl::Optional,
|
|
|
|
llvm::cl::desc("Use verbose output."),
|
|
|
|
llvm::cl::init(false),
|
|
|
|
llvm::cl::cat(DependencyScannerCategory));
|
|
|
|
|
2019-06-13 05:32:49 +08:00
|
|
|
} // end anonymous namespace
|
|
|
|
|
2019-09-12 08:48:45 +08:00
|
|
|
/// \returns object-file path derived from source-file path.
|
|
|
|
static std::string getObjFilePath(StringRef SrcFile) {
|
|
|
|
SmallString<128> ObjFileName(SrcFile);
|
|
|
|
llvm::sys::path::replace_extension(ObjFileName, "o");
|
2020-01-29 03:23:46 +08:00
|
|
|
return std::string(ObjFileName.str());
|
2019-09-12 08:48:45 +08:00
|
|
|
}
|
|
|
|
|
2019-10-31 05:04:11 +08:00
|
|
|
class SingleCommandCompilationDatabase : public tooling::CompilationDatabase {
|
|
|
|
public:
|
|
|
|
SingleCommandCompilationDatabase(tooling::CompileCommand Cmd)
|
|
|
|
: Command(std::move(Cmd)) {}
|
|
|
|
|
2020-07-21 10:52:36 +08:00
|
|
|
std::vector<tooling::CompileCommand>
|
|
|
|
getCompileCommands(StringRef FilePath) const override {
|
2019-10-31 05:04:11 +08:00
|
|
|
return {Command};
|
|
|
|
}
|
|
|
|
|
2020-07-21 10:52:36 +08:00
|
|
|
std::vector<tooling::CompileCommand> getAllCompileCommands() const override {
|
2019-10-31 05:04:11 +08:00
|
|
|
return {Command};
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
tooling::CompileCommand Command;
|
|
|
|
};
|
|
|
|
|
2019-10-22 13:05:18 +08:00
|
|
|
/// Takes the result of a dependency scan and prints error / dependency files
|
|
|
|
/// based on the result.
|
|
|
|
///
|
|
|
|
/// \returns True on error.
|
2019-10-29 05:26:45 +08:00
|
|
|
static bool
|
|
|
|
handleMakeDependencyToolResult(const std::string &Input,
|
|
|
|
llvm::Expected<std::string> &MaybeFile,
|
|
|
|
SharedStream &OS, SharedStream &Errs) {
|
2019-10-22 13:05:18 +08:00
|
|
|
if (!MaybeFile) {
|
|
|
|
llvm::handleAllErrors(
|
|
|
|
MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) {
|
|
|
|
Errs.applyLocked([&](raw_ostream &OS) {
|
|
|
|
OS << "Error while scanning dependencies for " << Input << ":\n";
|
|
|
|
OS << Err.getMessage();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; });
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-10-29 05:26:45 +08:00
|
|
|
static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
|
|
|
|
std::vector<llvm::StringRef> Strings;
|
|
|
|
for (auto &&I : Set)
|
|
|
|
Strings.push_back(I.getKey());
|
2020-03-29 02:19:55 +08:00
|
|
|
llvm::sort(Strings);
|
2019-10-29 05:26:45 +08:00
|
|
|
return llvm::json::Array(Strings);
|
|
|
|
}
|
|
|
|
|
|
|
|
static llvm::json::Array toJSONSorted(std::vector<ClangModuleDep> V) {
|
2020-03-29 02:19:55 +08:00
|
|
|
llvm::sort(V, [](const ClangModuleDep &A, const ClangModuleDep &B) {
|
|
|
|
return std::tie(A.ModuleName, A.ContextHash) <
|
|
|
|
std::tie(B.ModuleName, B.ContextHash);
|
|
|
|
});
|
2019-10-29 05:26:45 +08:00
|
|
|
|
|
|
|
llvm::json::Array Ret;
|
|
|
|
for (const ClangModuleDep &CMD : V)
|
|
|
|
Ret.push_back(llvm::json::Object(
|
|
|
|
{{"module-name", CMD.ModuleName}, {"context-hash", CMD.ContextHash}}));
|
|
|
|
return Ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Thread safe.
|
|
|
|
class FullDeps {
|
|
|
|
public:
|
|
|
|
void mergeDeps(StringRef Input, FullDependenciesResult FDR,
|
|
|
|
size_t InputIndex) {
|
|
|
|
const FullDependencies &FD = FDR.FullDeps;
|
|
|
|
|
|
|
|
InputDeps ID;
|
2020-01-29 03:23:46 +08:00
|
|
|
ID.FileName = std::string(Input);
|
2019-10-29 05:26:45 +08:00
|
|
|
ID.ContextHash = std::move(FD.ContextHash);
|
|
|
|
ID.FileDeps = std::move(FD.FileDeps);
|
|
|
|
ID.ModuleDeps = std::move(FD.ClangModuleDeps);
|
|
|
|
|
|
|
|
std::unique_lock<std::mutex> ul(Lock);
|
|
|
|
for (const ModuleDeps &MD : FDR.DiscoveredModules) {
|
|
|
|
auto I = Modules.find({MD.ContextHash, MD.ModuleName, 0});
|
|
|
|
if (I != Modules.end()) {
|
|
|
|
I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
Modules.insert(
|
|
|
|
I, {{MD.ContextHash, MD.ModuleName, InputIndex}, std::move(MD)});
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FullCommandLine)
|
|
|
|
ID.AdditonalCommandLine = FD.getAdditionalCommandLine(
|
|
|
|
[&](ClangModuleDep CMD) { return lookupPCMPath(CMD); },
|
|
|
|
[&](ClangModuleDep CMD) -> const ModuleDeps & {
|
|
|
|
return lookupModuleDeps(CMD);
|
|
|
|
});
|
|
|
|
|
|
|
|
Inputs.push_back(std::move(ID));
|
|
|
|
}
|
|
|
|
|
|
|
|
void printFullOutput(raw_ostream &OS) {
|
|
|
|
// Sort the modules by name to get a deterministic order.
|
|
|
|
std::vector<ContextModulePair> ModuleNames;
|
|
|
|
for (auto &&M : Modules)
|
|
|
|
ModuleNames.push_back(M.first);
|
2020-03-29 02:19:55 +08:00
|
|
|
llvm::sort(ModuleNames,
|
|
|
|
[](const ContextModulePair &A, const ContextModulePair &B) {
|
|
|
|
return std::tie(A.ModuleName, A.InputIndex) <
|
|
|
|
std::tie(B.ModuleName, B.InputIndex);
|
|
|
|
});
|
|
|
|
|
|
|
|
llvm::sort(Inputs, [](const InputDeps &A, const InputDeps &B) {
|
|
|
|
return A.FileName < B.FileName;
|
|
|
|
});
|
2019-10-29 05:26:45 +08:00
|
|
|
|
|
|
|
using namespace llvm::json;
|
|
|
|
|
|
|
|
Array OutModules;
|
|
|
|
for (auto &&ModName : ModuleNames) {
|
|
|
|
auto &MD = Modules[ModName];
|
|
|
|
Object O{
|
|
|
|
{"name", MD.ModuleName},
|
|
|
|
{"context-hash", MD.ContextHash},
|
|
|
|
{"file-deps", toJSONSorted(MD.FileDeps)},
|
|
|
|
{"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)},
|
|
|
|
{"clang-modulemap-file", MD.ClangModuleMapFile},
|
|
|
|
{"command-line",
|
|
|
|
FullCommandLine
|
|
|
|
? MD.getFullCommandLine(
|
|
|
|
[&](ClangModuleDep CMD) { return lookupPCMPath(CMD); },
|
|
|
|
[&](ClangModuleDep CMD) -> const ModuleDeps & {
|
|
|
|
return lookupModuleDeps(CMD);
|
|
|
|
})
|
|
|
|
: MD.NonPathCommandLine},
|
|
|
|
};
|
|
|
|
OutModules.push_back(std::move(O));
|
|
|
|
}
|
|
|
|
|
|
|
|
Array TUs;
|
|
|
|
for (auto &&I : Inputs) {
|
|
|
|
Object O{
|
|
|
|
{"input-file", I.FileName},
|
|
|
|
{"clang-context-hash", I.ContextHash},
|
|
|
|
{"file-deps", I.FileDeps},
|
|
|
|
{"clang-module-deps", toJSONSorted(I.ModuleDeps)},
|
|
|
|
{"command-line", I.AdditonalCommandLine},
|
|
|
|
};
|
|
|
|
TUs.push_back(std::move(O));
|
|
|
|
}
|
|
|
|
|
|
|
|
Object Output{
|
|
|
|
{"modules", std::move(OutModules)},
|
|
|
|
{"translation-units", std::move(TUs)},
|
|
|
|
};
|
|
|
|
|
|
|
|
OS << llvm::formatv("{0:2}\n", Value(std::move(Output)));
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
StringRef lookupPCMPath(ClangModuleDep CMD) {
|
|
|
|
return Modules[ContextModulePair{CMD.ContextHash, CMD.ModuleName, 0}]
|
|
|
|
.ImplicitModulePCMPath;
|
|
|
|
}
|
|
|
|
|
|
|
|
const ModuleDeps &lookupModuleDeps(ClangModuleDep CMD) {
|
|
|
|
auto I =
|
|
|
|
Modules.find(ContextModulePair{CMD.ContextHash, CMD.ModuleName, 0});
|
|
|
|
assert(I != Modules.end());
|
|
|
|
return I->second;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ContextModulePair {
|
|
|
|
std::string ContextHash;
|
|
|
|
std::string ModuleName;
|
|
|
|
mutable size_t InputIndex;
|
|
|
|
|
|
|
|
bool operator==(const ContextModulePair &Other) const {
|
|
|
|
return ContextHash == Other.ContextHash && ModuleName == Other.ModuleName;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ContextModulePairHasher {
|
|
|
|
std::size_t operator()(const ContextModulePair &CMP) const {
|
|
|
|
using llvm::hash_combine;
|
|
|
|
|
|
|
|
return hash_combine(CMP.ContextHash, CMP.ModuleName);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct InputDeps {
|
|
|
|
std::string FileName;
|
|
|
|
std::string ContextHash;
|
|
|
|
std::vector<std::string> FileDeps;
|
|
|
|
std::vector<ClangModuleDep> ModuleDeps;
|
|
|
|
std::vector<std::string> AdditonalCommandLine;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::mutex Lock;
|
|
|
|
std::unordered_map<ContextModulePair, ModuleDeps, ContextModulePairHasher>
|
|
|
|
Modules;
|
|
|
|
std::vector<InputDeps> Inputs;
|
|
|
|
};
|
|
|
|
|
|
|
|
static bool handleFullDependencyToolResult(
|
|
|
|
const std::string &Input,
|
|
|
|
llvm::Expected<FullDependenciesResult> &MaybeFullDeps, FullDeps &FD,
|
|
|
|
size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
|
|
|
|
if (!MaybeFullDeps) {
|
|
|
|
llvm::handleAllErrors(
|
|
|
|
MaybeFullDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) {
|
|
|
|
Errs.applyLocked([&](raw_ostream &OS) {
|
|
|
|
OS << "Error while scanning dependencies for " << Input << ":\n";
|
|
|
|
OS << Err.getMessage();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
FD.mergeDeps(Input, std::move(*MaybeFullDeps), InputIndex);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-06-13 05:32:49 +08:00
|
|
|
int main(int argc, const char **argv) {
|
|
|
|
llvm::InitLLVM X(argc, argv);
|
|
|
|
llvm::cl::HideUnrelatedOptions(DependencyScannerCategory);
|
|
|
|
if (!llvm::cl::ParseCommandLineOptions(argc, argv))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
std::string ErrorMessage;
|
|
|
|
std::unique_ptr<tooling::JSONCompilationDatabase> Compilations =
|
|
|
|
tooling::JSONCompilationDatabase::loadFromFile(
|
|
|
|
CompilationDB, ErrorMessage,
|
|
|
|
tooling::JSONCommandLineSyntax::AutoDetect);
|
|
|
|
if (!Compilations) {
|
|
|
|
llvm::errs() << "error: " << ErrorMessage << "\n";
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::cl::PrintOptionValues();
|
|
|
|
|
|
|
|
// The command options are rewritten to run Clang in preprocessor only mode.
|
|
|
|
auto AdjustingCompilations =
|
2019-08-15 07:04:18 +08:00
|
|
|
std::make_unique<tooling::ArgumentsAdjustingCompilations>(
|
2019-06-13 05:32:49 +08:00
|
|
|
std::move(Compilations));
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
ResourceDirectoryCache ResourceDirCache;
|
2019-06-13 05:32:49 +08:00
|
|
|
AdjustingCompilations->appendArgumentsAdjuster(
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
[&ResourceDirCache](const tooling::CommandLineArguments &Args,
|
|
|
|
StringRef FileName) {
|
2019-09-12 08:48:45 +08:00
|
|
|
std::string LastO = "";
|
|
|
|
bool HasMT = false;
|
|
|
|
bool HasMQ = false;
|
|
|
|
bool HasMD = false;
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
bool HasResourceDir = false;
|
2019-09-12 08:48:45 +08:00
|
|
|
// We need to find the last -o value.
|
|
|
|
if (!Args.empty()) {
|
|
|
|
std::size_t Idx = Args.size() - 1;
|
|
|
|
for (auto It = Args.rbegin(); It != Args.rend(); ++It) {
|
|
|
|
if (It != Args.rbegin()) {
|
|
|
|
if (Args[Idx] == "-o")
|
|
|
|
LastO = Args[Idx + 1];
|
|
|
|
if (Args[Idx] == "-MT")
|
|
|
|
HasMT = true;
|
|
|
|
if (Args[Idx] == "-MQ")
|
|
|
|
HasMQ = true;
|
|
|
|
if (Args[Idx] == "-MD")
|
|
|
|
HasMD = true;
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
if (Args[Idx] == "-resource-dir")
|
|
|
|
HasResourceDir = true;
|
2019-09-12 08:48:45 +08:00
|
|
|
}
|
|
|
|
--Idx;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If there's no -MT/-MQ Driver would add -MT with the value of the last
|
|
|
|
// -o option.
|
2019-06-13 05:32:49 +08:00
|
|
|
tooling::CommandLineArguments AdjustedArgs = Args;
|
|
|
|
AdjustedArgs.push_back("-o");
|
|
|
|
AdjustedArgs.push_back("/dev/null");
|
2019-09-12 08:48:45 +08:00
|
|
|
if (!HasMT && !HasMQ) {
|
2019-09-14 15:25:27 +08:00
|
|
|
AdjustedArgs.push_back("-M");
|
2019-09-12 08:48:45 +08:00
|
|
|
AdjustedArgs.push_back("-MT");
|
|
|
|
// We're interested in source dependencies of an object file.
|
|
|
|
if (!HasMD) {
|
|
|
|
// FIXME: We are missing the directory unless the -o value is an
|
|
|
|
// absolute path.
|
|
|
|
AdjustedArgs.push_back(!LastO.empty() ? LastO
|
|
|
|
: getObjFilePath(FileName));
|
|
|
|
} else {
|
2020-01-29 03:23:46 +08:00
|
|
|
AdjustedArgs.push_back(std::string(FileName));
|
2019-09-12 08:48:45 +08:00
|
|
|
}
|
|
|
|
}
|
2019-06-13 05:32:49 +08:00
|
|
|
AdjustedArgs.push_back("-Xclang");
|
|
|
|
AdjustedArgs.push_back("-Eonly");
|
|
|
|
AdjustedArgs.push_back("-Xclang");
|
|
|
|
AdjustedArgs.push_back("-sys-header-deps");
|
2019-07-04 02:01:32 +08:00
|
|
|
AdjustedArgs.push_back("-Wno-error");
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
|
|
|
|
if (!HasResourceDir) {
|
|
|
|
StringRef ResourceDir =
|
|
|
|
ResourceDirCache.findResourceDir(Args);
|
|
|
|
if (!ResourceDir.empty()) {
|
|
|
|
AdjustedArgs.push_back("-resource-dir");
|
2020-01-29 03:23:46 +08:00
|
|
|
AdjustedArgs.push_back(std::string(ResourceDir));
|
Add support to find out resource dir and add it as compilation args
Summary:
If -resource-dir is not specified as part of the compilation command, then by default
clang-scan-deps picks up a directory relative to its own path as resource-directory.
This is probably not the right behavior - since resource directory should be picked relative
to the path of the clang-compiler in the compilation command.
This patch adds support for it along with a cache to store the resource-dir paths based on
compiler paths.
Notes:
1. "-resource-dir" is a behavior that's specific to clang, gcc does not have that flag. That's why if I'm not able to find a resource-dir, I quietly ignore it.
2. Should I also use the mtime of the compiler in the cache? I think its not strictly necessary since we assume the filesystem is immutable.
3. From my testing, this does not regress performance.
4. Will try to get this tested on Windows.
But basically the problem that this patch is trying to solve is, clients might not always want to specify
"-resource-dir" in their compile commands, so scan-deps must auto-infer it correctly.
Reviewers: arphaman, Bigcheese, jkorous, dexonsmith, klimek
Reviewed By: Bigcheese
Subscribers: MaskRay, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D69122
2019-11-03 12:42:17 +08:00
|
|
|
}
|
|
|
|
}
|
2019-06-13 05:32:49 +08:00
|
|
|
return AdjustedArgs;
|
|
|
|
});
|
2019-09-21 08:17:26 +08:00
|
|
|
AdjustingCompilations->appendArgumentsAdjuster(
|
|
|
|
tooling::getClangStripSerializeDiagnosticAdjuster());
|
2019-06-13 05:32:49 +08:00
|
|
|
|
2019-06-27 05:11:51 +08:00
|
|
|
SharedStream Errs(llvm::errs());
|
2019-06-22 02:24:55 +08:00
|
|
|
// Print out the dependency results to STDOUT by default.
|
|
|
|
SharedStream DependencyOS(llvm::outs());
|
2019-08-07 04:43:25 +08:00
|
|
|
|
2019-10-17 03:28:35 +08:00
|
|
|
DependencyScanningService Service(ScanMode, Format, ReuseFileManager,
|
2019-09-12 04:40:31 +08:00
|
|
|
SkipExcludedPPRanges);
|
[Support] On Windows, ensure hardware_concurrency() extends to all CPU sockets and all NUMA groups
The goal of this patch is to maximize CPU utilization on multi-socket or high core count systems, so that parallel computations such as LLD/ThinLTO can use all hardware threads in the system. Before this patch, on Windows, a maximum of 64 hardware threads could be used at most, in some cases dispatched only on one CPU socket.
== Background ==
Windows doesn't have a flat cpu_set_t like Linux. Instead, it projects hardware CPUs (or NUMA nodes) to applications through a concept of "processor groups". A "processor" is the smallest unit of execution on a CPU, that is, an hyper-thread if SMT is active; a core otherwise. There's a limit of 32-bit processors on older 32-bit versions of Windows, which later was raised to 64-processors with 64-bit versions of Windows. This limit comes from the affinity mask, which historically is represented by the sizeof(void*). Consequently, the concept of "processor groups" was introduced for dealing with systems with more than 64 hyper-threads.
By default, the Windows OS assigns only one "processor group" to each starting application, in a round-robin manner. If the application wants to use more processors, it needs to programmatically enable it, by assigning threads to other "processor groups". This also means that affinity cannot cross "processor group" boundaries; one can only specify a "preferred" group on start-up, but the application is free to allocate more groups if it wants to.
This creates a peculiar situation, where newer CPUs like the AMD EPYC 7702P (64-cores, 128-hyperthreads) are projected by the OS as two (2) "processor groups". This means that by default, an application can only use half of the cores. This situation could only get worse in the years to come, as dies with more cores will appear on the market.
== The problem ==
The heavyweight_hardware_concurrency() API was introduced so that only *one hardware thread per core* was used. Once that API returns, that original intention is lost, only the number of threads is retained. Consider a situation, on Windows, where the system has 2 CPU sockets, 18 cores each, each core having 2 hyper-threads, for a total of 72 hyper-threads. Both heavyweight_hardware_concurrency() and hardware_concurrency() currently return 36, because on Windows they are simply wrappers over std::thread::hardware_concurrency() -- which can only return processors from the current "processor group".
== The changes in this patch ==
To solve this situation, we capture (and retain) the initial intention until the point of usage, through a new ThreadPoolStrategy class. The number of threads to use is deferred as late as possible, until the moment where the std::threads are created (ThreadPool in the case of ThinLTO).
When using hardware_concurrency(), setting ThreadCount to 0 now means to use all the possible hardware CPU (SMT) threads. Providing a ThreadCount above to the maximum number of threads will have no effect, the maximum will be used instead.
The heavyweight_hardware_concurrency() is similar to hardware_concurrency(), except that only one thread per hardware *core* will be used.
When LLVM_ENABLE_THREADS is OFF, the threading APIs will always return 1, to ensure any caller loops will be exercised at least once.
Differential Revision: https://reviews.llvm.org/D71775
2020-02-14 11:49:57 +08:00
|
|
|
llvm::ThreadPool Pool(llvm::hardware_concurrency(NumThreads));
|
2019-06-13 05:32:49 +08:00
|
|
|
std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
|
[Support] On Windows, ensure hardware_concurrency() extends to all CPU sockets and all NUMA groups
The goal of this patch is to maximize CPU utilization on multi-socket or high core count systems, so that parallel computations such as LLD/ThinLTO can use all hardware threads in the system. Before this patch, on Windows, a maximum of 64 hardware threads could be used at most, in some cases dispatched only on one CPU socket.
== Background ==
Windows doesn't have a flat cpu_set_t like Linux. Instead, it projects hardware CPUs (or NUMA nodes) to applications through a concept of "processor groups". A "processor" is the smallest unit of execution on a CPU, that is, an hyper-thread if SMT is active; a core otherwise. There's a limit of 32-bit processors on older 32-bit versions of Windows, which later was raised to 64-processors with 64-bit versions of Windows. This limit comes from the affinity mask, which historically is represented by the sizeof(void*). Consequently, the concept of "processor groups" was introduced for dealing with systems with more than 64 hyper-threads.
By default, the Windows OS assigns only one "processor group" to each starting application, in a round-robin manner. If the application wants to use more processors, it needs to programmatically enable it, by assigning threads to other "processor groups". This also means that affinity cannot cross "processor group" boundaries; one can only specify a "preferred" group on start-up, but the application is free to allocate more groups if it wants to.
This creates a peculiar situation, where newer CPUs like the AMD EPYC 7702P (64-cores, 128-hyperthreads) are projected by the OS as two (2) "processor groups". This means that by default, an application can only use half of the cores. This situation could only get worse in the years to come, as dies with more cores will appear on the market.
== The problem ==
The heavyweight_hardware_concurrency() API was introduced so that only *one hardware thread per core* was used. Once that API returns, that original intention is lost, only the number of threads is retained. Consider a situation, on Windows, where the system has 2 CPU sockets, 18 cores each, each core having 2 hyper-threads, for a total of 72 hyper-threads. Both heavyweight_hardware_concurrency() and hardware_concurrency() currently return 36, because on Windows they are simply wrappers over std::thread::hardware_concurrency() -- which can only return processors from the current "processor group".
== The changes in this patch ==
To solve this situation, we capture (and retain) the initial intention until the point of usage, through a new ThreadPoolStrategy class. The number of threads to use is deferred as late as possible, until the moment where the std::threads are created (ThreadPool in the case of ThinLTO).
When using hardware_concurrency(), setting ThreadCount to 0 now means to use all the possible hardware CPU (SMT) threads. Providing a ThreadCount above to the maximum number of threads will have no effect, the maximum will be used instead.
The heavyweight_hardware_concurrency() is similar to hardware_concurrency(), except that only one thread per hardware *core* will be used.
When LLVM_ENABLE_THREADS is OFF, the threading APIs will always return 1, to ensure any caller loops will be exercised at least once.
Differential Revision: https://reviews.llvm.org/D71775
2020-02-14 11:49:57 +08:00
|
|
|
for (unsigned I = 0; I < Pool.getThreadCount(); ++I)
|
2019-10-31 05:04:11 +08:00
|
|
|
WorkerTools.push_back(std::make_unique<DependencyScanningTool>(Service));
|
|
|
|
|
|
|
|
std::vector<SingleCommandCompilationDatabase> Inputs;
|
|
|
|
for (tooling::CompileCommand Cmd :
|
|
|
|
AdjustingCompilations->getAllCompileCommands())
|
|
|
|
Inputs.emplace_back(Cmd);
|
2019-06-13 05:32:49 +08:00
|
|
|
|
|
|
|
std::atomic<bool> HadErrors(false);
|
2019-10-29 05:26:45 +08:00
|
|
|
FullDeps FD;
|
2019-06-13 05:32:49 +08:00
|
|
|
std::mutex Lock;
|
|
|
|
size_t Index = 0;
|
|
|
|
|
2019-09-18 03:45:24 +08:00
|
|
|
if (Verbose) {
|
|
|
|
llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
|
[Support] On Windows, ensure hardware_concurrency() extends to all CPU sockets and all NUMA groups
The goal of this patch is to maximize CPU utilization on multi-socket or high core count systems, so that parallel computations such as LLD/ThinLTO can use all hardware threads in the system. Before this patch, on Windows, a maximum of 64 hardware threads could be used at most, in some cases dispatched only on one CPU socket.
== Background ==
Windows doesn't have a flat cpu_set_t like Linux. Instead, it projects hardware CPUs (or NUMA nodes) to applications through a concept of "processor groups". A "processor" is the smallest unit of execution on a CPU, that is, an hyper-thread if SMT is active; a core otherwise. There's a limit of 32-bit processors on older 32-bit versions of Windows, which later was raised to 64-processors with 64-bit versions of Windows. This limit comes from the affinity mask, which historically is represented by the sizeof(void*). Consequently, the concept of "processor groups" was introduced for dealing with systems with more than 64 hyper-threads.
By default, the Windows OS assigns only one "processor group" to each starting application, in a round-robin manner. If the application wants to use more processors, it needs to programmatically enable it, by assigning threads to other "processor groups". This also means that affinity cannot cross "processor group" boundaries; one can only specify a "preferred" group on start-up, but the application is free to allocate more groups if it wants to.
This creates a peculiar situation, where newer CPUs like the AMD EPYC 7702P (64-cores, 128-hyperthreads) are projected by the OS as two (2) "processor groups". This means that by default, an application can only use half of the cores. This situation could only get worse in the years to come, as dies with more cores will appear on the market.
== The problem ==
The heavyweight_hardware_concurrency() API was introduced so that only *one hardware thread per core* was used. Once that API returns, that original intention is lost, only the number of threads is retained. Consider a situation, on Windows, where the system has 2 CPU sockets, 18 cores each, each core having 2 hyper-threads, for a total of 72 hyper-threads. Both heavyweight_hardware_concurrency() and hardware_concurrency() currently return 36, because on Windows they are simply wrappers over std::thread::hardware_concurrency() -- which can only return processors from the current "processor group".
== The changes in this patch ==
To solve this situation, we capture (and retain) the initial intention until the point of usage, through a new ThreadPoolStrategy class. The number of threads to use is deferred as late as possible, until the moment where the std::threads are created (ThreadPool in the case of ThinLTO).
When using hardware_concurrency(), setting ThreadCount to 0 now means to use all the possible hardware CPU (SMT) threads. Providing a ThreadCount above to the maximum number of threads will have no effect, the maximum will be used instead.
The heavyweight_hardware_concurrency() is similar to hardware_concurrency(), except that only one thread per hardware *core* will be used.
When LLVM_ENABLE_THREADS is OFF, the threading APIs will always return 1, to ensure any caller loops will be exercised at least once.
Differential Revision: https://reviews.llvm.org/D71775
2020-02-14 11:49:57 +08:00
|
|
|
<< " files using " << Pool.getThreadCount() << " workers\n";
|
2019-09-18 03:45:24 +08:00
|
|
|
}
|
[Support] On Windows, ensure hardware_concurrency() extends to all CPU sockets and all NUMA groups
The goal of this patch is to maximize CPU utilization on multi-socket or high core count systems, so that parallel computations such as LLD/ThinLTO can use all hardware threads in the system. Before this patch, on Windows, a maximum of 64 hardware threads could be used at most, in some cases dispatched only on one CPU socket.
== Background ==
Windows doesn't have a flat cpu_set_t like Linux. Instead, it projects hardware CPUs (or NUMA nodes) to applications through a concept of "processor groups". A "processor" is the smallest unit of execution on a CPU, that is, an hyper-thread if SMT is active; a core otherwise. There's a limit of 32-bit processors on older 32-bit versions of Windows, which later was raised to 64-processors with 64-bit versions of Windows. This limit comes from the affinity mask, which historically is represented by the sizeof(void*). Consequently, the concept of "processor groups" was introduced for dealing with systems with more than 64 hyper-threads.
By default, the Windows OS assigns only one "processor group" to each starting application, in a round-robin manner. If the application wants to use more processors, it needs to programmatically enable it, by assigning threads to other "processor groups". This also means that affinity cannot cross "processor group" boundaries; one can only specify a "preferred" group on start-up, but the application is free to allocate more groups if it wants to.
This creates a peculiar situation, where newer CPUs like the AMD EPYC 7702P (64-cores, 128-hyperthreads) are projected by the OS as two (2) "processor groups". This means that by default, an application can only use half of the cores. This situation could only get worse in the years to come, as dies with more cores will appear on the market.
== The problem ==
The heavyweight_hardware_concurrency() API was introduced so that only *one hardware thread per core* was used. Once that API returns, that original intention is lost, only the number of threads is retained. Consider a situation, on Windows, where the system has 2 CPU sockets, 18 cores each, each core having 2 hyper-threads, for a total of 72 hyper-threads. Both heavyweight_hardware_concurrency() and hardware_concurrency() currently return 36, because on Windows they are simply wrappers over std::thread::hardware_concurrency() -- which can only return processors from the current "processor group".
== The changes in this patch ==
To solve this situation, we capture (and retain) the initial intention until the point of usage, through a new ThreadPoolStrategy class. The number of threads to use is deferred as late as possible, until the moment where the std::threads are created (ThreadPool in the case of ThinLTO).
When using hardware_concurrency(), setting ThreadCount to 0 now means to use all the possible hardware CPU (SMT) threads. Providing a ThreadCount above to the maximum number of threads will have no effect, the maximum will be used instead.
The heavyweight_hardware_concurrency() is similar to hardware_concurrency(), except that only one thread per hardware *core* will be used.
When LLVM_ENABLE_THREADS is OFF, the threading APIs will always return 1, to ensure any caller loops will be exercised at least once.
Differential Revision: https://reviews.llvm.org/D71775
2020-02-14 11:49:57 +08:00
|
|
|
for (unsigned I = 0; I < Pool.getThreadCount(); ++I) {
|
2020-02-14 05:53:25 +08:00
|
|
|
Pool.async([I, &Lock, &Index, &Inputs, &HadErrors, &FD, &WorkerTools,
|
|
|
|
&DependencyOS, &Errs]() {
|
2019-10-29 05:26:45 +08:00
|
|
|
llvm::StringSet<> AlreadySeenModules;
|
2019-08-13 08:36:35 +08:00
|
|
|
while (true) {
|
2019-10-31 05:04:11 +08:00
|
|
|
const SingleCommandCompilationDatabase *Input;
|
|
|
|
std::string Filename;
|
|
|
|
std::string CWD;
|
2019-10-29 05:26:45 +08:00
|
|
|
size_t LocalIndex;
|
2019-08-13 08:36:35 +08:00
|
|
|
// Take the next input.
|
|
|
|
{
|
|
|
|
std::unique_lock<std::mutex> LockGuard(Lock);
|
|
|
|
if (Index >= Inputs.size())
|
|
|
|
return;
|
2019-10-29 05:26:45 +08:00
|
|
|
LocalIndex = Index;
|
2019-10-31 05:04:11 +08:00
|
|
|
Input = &Inputs[Index++];
|
|
|
|
tooling::CompileCommand Cmd = Input->getAllCompileCommands()[0];
|
|
|
|
Filename = std::move(Cmd.Filename);
|
|
|
|
CWD = std::move(Cmd.Directory);
|
2019-08-13 08:36:35 +08:00
|
|
|
}
|
|
|
|
// Run the tool on it.
|
2019-10-29 05:26:45 +08:00
|
|
|
if (Format == ScanningOutputFormat::Make) {
|
|
|
|
auto MaybeFile = WorkerTools[I]->getDependencyFile(*Input, CWD);
|
|
|
|
if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
|
|
|
|
Errs))
|
|
|
|
HadErrors = true;
|
|
|
|
} else {
|
|
|
|
auto MaybeFullDeps = WorkerTools[I]->getFullDependencies(
|
|
|
|
*Input, CWD, AlreadySeenModules);
|
|
|
|
if (handleFullDependencyToolResult(Filename, MaybeFullDeps, FD,
|
|
|
|
LocalIndex, DependencyOS, Errs))
|
|
|
|
HadErrors = true;
|
|
|
|
}
|
2019-08-13 08:36:35 +08:00
|
|
|
}
|
2020-02-14 05:53:25 +08:00
|
|
|
});
|
2019-06-13 05:32:49 +08:00
|
|
|
}
|
2020-02-14 05:53:25 +08:00
|
|
|
Pool.wait();
|
2019-06-13 05:32:49 +08:00
|
|
|
|
2019-10-29 05:26:45 +08:00
|
|
|
if (Format == ScanningOutputFormat::Full)
|
|
|
|
FD.printFullOutput(llvm::outs());
|
|
|
|
|
2019-06-13 05:32:49 +08:00
|
|
|
return HadErrors;
|
|
|
|
}
|