forked from OSchip/llvm-project
[clang][deps] Include canonical invocation in ContextHash
The "strict context hash" is insufficient to identify module dependencies during scanning, leading to different module build commands being produced for a single module, and non-deterministically choosing between them. This commit switches to hashing the canonicalized `CompilerInvocation` of the module. By hashing the invocation we are converting these from correctness issues to performance issues, and we can then incrementally improve our ability to canonicalize command-lines. This change can cause a regression in the number of modules needed. Of the 4 projects I tested, 3 had no regression, but 1, which was clang+llvm itself, had a 66% regression in number of modules (4% regression in total invocations). This is almost entirely due to differences between -W options across targets. Of this, 25% of the additional modules are system modules, which we could avoid if we canonicalized -W options when -Wsystem-headers is not present -- unfortunately this is non-trivial due to some warnings being enabled in system headers by default. The rest of the additional modules are mostly real differences in potential warnings, reflecting incorrect behaviour in the current scanner. There were also a couple of differences due to `-DFOO` `-fmodule-ignore-macro=FOO`, which I fixed here. Since the output paths for the module depend on its context hash, we hash the invocation before filling in outputs, and rely on the build system to always return the same output paths for a given module. Note: since the scanner itself uses an implicit modules build, there can still be non-determinism, but it will now present as different module+hashes rather than different command-lines for the same module+hash. Differential Revision: https://reviews.llvm.org/D129884
This commit is contained in:
parent
66eabeb65d
commit
0287170140
|
@ -46,9 +46,11 @@ struct ModuleID {
|
|||
/// or a header-name for C++20 header units.
|
||||
std::string ModuleName;
|
||||
|
||||
/// The context hash of a module represents the set of compiler options that
|
||||
/// may make one version of a module incompatible with another. This includes
|
||||
/// things like language mode, predefined macros, header search paths, etc...
|
||||
/// The context hash of a module represents the compiler options that affect
|
||||
/// the resulting command-line invocation.
|
||||
///
|
||||
/// Modules with the same name and ContextHash but different invocations could
|
||||
/// cause non-deterministic build results.
|
||||
///
|
||||
/// Modules with the same name but a different \c ContextHash should be
|
||||
/// treated as separate modules for the purpose of a build.
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
|
||||
#include "llvm/Support/BLAKE3.h"
|
||||
#include "llvm/Support/StringSaver.h"
|
||||
|
||||
using namespace clang;
|
||||
|
@ -77,6 +78,19 @@ CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
|
|||
CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
|
||||
CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
|
||||
|
||||
// Remove any macro definitions that are explicitly ignored.
|
||||
if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
|
||||
llvm::erase_if(
|
||||
CI.getPreprocessorOpts().Macros,
|
||||
[&CI](const std::pair<std::string, bool> &Def) {
|
||||
StringRef MacroDef = Def.first;
|
||||
return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
|
||||
llvm::CachedHashString(MacroDef.split('=').first));
|
||||
});
|
||||
// Remove the now unused option.
|
||||
CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
|
||||
}
|
||||
|
||||
// Report the prebuilt modules this module uses.
|
||||
for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
|
||||
CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
|
||||
|
@ -156,6 +170,50 @@ std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
|
|||
return serializeCompilerInvocation(CI);
|
||||
}
|
||||
|
||||
static std::string getModuleContextHash(const ModuleDeps &MD) {
|
||||
llvm::HashBuilder<llvm::TruncatedBLAKE3<16>,
|
||||
llvm::support::endianness::native>
|
||||
HashBuilder;
|
||||
SmallString<32> Scratch;
|
||||
|
||||
// Hash the compiler version and serialization version to ensure the module
|
||||
// will be readable.
|
||||
HashBuilder.add(getClangFullRepositoryVersion());
|
||||
HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
|
||||
|
||||
// Hash the BuildInvocation without any input files.
|
||||
SmallVector<const char *, 32> DummyArgs;
|
||||
MD.BuildInvocation.generateCC1CommandLine(DummyArgs, [&](const Twine &Arg) {
|
||||
Scratch.clear();
|
||||
StringRef Str = Arg.toStringRef(Scratch);
|
||||
HashBuilder.add(Str);
|
||||
return "<unused>";
|
||||
});
|
||||
|
||||
// Hash the input file paths and module dependencies. These paths may differ
|
||||
// even if the invocation is identical if they depend on the contents of the
|
||||
// files in the TU -- for example, case-insensitive paths to modulemap files.
|
||||
// Usually such a case would indicate a missed optimization to canonicalize,
|
||||
// but it may be difficult to canonicalize all cases when there is a VFS.
|
||||
HashBuilder.add(MD.ClangModuleMapFile);
|
||||
for (const auto &Dep : MD.PrebuiltModuleDeps)
|
||||
HashBuilder.add(Dep.PCMFile);
|
||||
for (const auto &ID : MD.ClangModuleDeps) {
|
||||
HashBuilder.add(ID.ModuleName);
|
||||
HashBuilder.add(ID.ContextHash);
|
||||
}
|
||||
|
||||
// Hash options that affect which callbacks are made for outputs.
|
||||
HashBuilder.add(MD.HadDependencyFile);
|
||||
HashBuilder.add(MD.HadSerializedDiagnostics);
|
||||
|
||||
llvm::BLAKE3Result<16> Hash = HashBuilder.final();
|
||||
std::array<uint64_t, 2> Words;
|
||||
static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
|
||||
std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
|
||||
return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
|
||||
return serializeCompilerInvocation(BuildInvocation);
|
||||
|
@ -346,13 +404,12 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
|
|||
.DiagnosticSerializationFile.empty();
|
||||
MD.HadDependencyFile =
|
||||
!MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
|
||||
// FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
|
||||
// the context hash since it can affect the command-line.
|
||||
MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
|
||||
|
||||
llvm::DenseSet<const Module *> AddedModules;
|
||||
addAllSubmoduleDeps(M, MD, AddedModules);
|
||||
|
||||
// Do this last since it requires the dependencies.
|
||||
MD.ID.ContextHash = getModuleContextHash(MD);
|
||||
return MD.ID;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
// Ensure '-DFOO -fmodules-ignore-macro=FOO' and '' both produce the same
|
||||
// canonical module build command.
|
||||
|
||||
// RUN: rm -rf %t
|
||||
// RUN: split-file %s %t
|
||||
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
|
||||
|
||||
// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
|
||||
// RUN: -format experimental-full > %t/deps.json
|
||||
// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
|
||||
|
||||
// CHECK: {
|
||||
// CHECK-NEXT: "modules": [
|
||||
// CHECK: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK-NOT: "FOO"
|
||||
// CHECK-NOT: "-fmodules-ignore-macro
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH_NO_FOO:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK: "-D"
|
||||
// CHECK-NEXT: "FOO"
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH_FOO:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "translation-units": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH_NO_FOO]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK-NOT: "-DFOO"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu1.c"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH_FOO]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK: "-DFOO"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu2.c"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH_NO_FOO]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK: "-DFOO"
|
||||
// CHECK: "-fmodules-ignore-macro=FOO"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu3.c"
|
||||
|
||||
//--- cdb.json.template
|
||||
[
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu1.c"
|
||||
},
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -DFOO -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu2.c"
|
||||
},
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -DFOO -fmodules-ignore-macro=FOO -fsyntax-only DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu3.c"
|
||||
},
|
||||
]
|
||||
|
||||
//--- module.modulemap
|
||||
module Mod { header "Mod.h" }
|
||||
|
||||
//--- Mod.h
|
||||
|
||||
//--- tu1.c
|
||||
#include "Mod.h"
|
||||
|
||||
//--- tu2.c
|
||||
#include "Mod.h"
|
||||
|
||||
//--- tu3.c
|
||||
#include "Mod.h"
|
|
@ -0,0 +1,77 @@
|
|||
// Ensure the path to the modulemap input is included in the module context hash
|
||||
// irrespective of other TU command-line arguments, as it effects the canonical
|
||||
// module build command. In this test we use the difference in spelling between
|
||||
// module.modulemap and module.map, but it also applies to situations such as
|
||||
// differences in case-insensitive paths if they are not canonicalized away.
|
||||
|
||||
// RUN: rm -rf %t
|
||||
// RUN: split-file %s %t
|
||||
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
|
||||
|
||||
// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
|
||||
// RUN: -format experimental-full > %t/deps.json
|
||||
|
||||
// RUN: mv %t/module.modulemap %t/module.map
|
||||
// RUN: echo 'AFTER_MOVE' >> %t/deps.json
|
||||
|
||||
// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
|
||||
// RUN: -format experimental-full >> %t/deps.json
|
||||
|
||||
// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
|
||||
|
||||
// CHECK: {
|
||||
// CHECK-NEXT: "modules": [
|
||||
// CHECK: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK: "{{.*}}module.modulemap"
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH1:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "translation-units": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH1]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-LABEL: AFTER_MOVE
|
||||
// CHECK: {
|
||||
// CHECK-NEXT: "modules": [
|
||||
// CHECK: {
|
||||
// CHECK-NOT: [[HASH1]]
|
||||
// CHECK: "command-line": [
|
||||
// CHECK: "{{.*}}module.map"
|
||||
// CHECK: ]
|
||||
// CHECK-NOT: [[HASH1]]
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "translation-units": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash":
|
||||
// CHECK-NOT: [[HASH1]]
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
|
||||
//--- cdb.json.template
|
||||
[
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu.c"
|
||||
}
|
||||
]
|
||||
|
||||
//--- module.modulemap
|
||||
module Mod { header "Mod.h" }
|
||||
|
||||
//--- Mod.h
|
||||
|
||||
//--- tu.c
|
||||
#include "Mod.h"
|
|
@ -0,0 +1,77 @@
|
|||
// If secondary output files such as .d are enabled, ensure it affects the
|
||||
// module context hash since it may impact the resulting module build commands.
|
||||
|
||||
// RUN: rm -rf %t
|
||||
// RUN: split-file %s %t
|
||||
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
|
||||
|
||||
// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
|
||||
// RUN: -format experimental-full > %t/deps.json
|
||||
// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
|
||||
|
||||
// CHECK: {
|
||||
// CHECK-NEXT: "modules": [
|
||||
// CHECK: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK: "-dependency-file"
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH1:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK-NOT: "-dependency-file"
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH2:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "translation-units": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH1]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK: "-MF"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu1.c"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH2]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK-NOT: "-MF"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu2.c"
|
||||
|
||||
//--- cdb.json.template
|
||||
[
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -MD -MF DIR/tu1.d -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu1.c"
|
||||
},
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu2.c"
|
||||
},
|
||||
]
|
||||
|
||||
//--- module.modulemap
|
||||
module Mod { header "Mod.h" }
|
||||
|
||||
//--- Mod.h
|
||||
|
||||
//--- tu1.c
|
||||
#include "Mod.h"
|
||||
|
||||
//--- tu2.c
|
||||
#include "Mod.h"
|
|
@ -0,0 +1,77 @@
|
|||
// Differences in -W warning options should result in different canonical module
|
||||
// build commands.
|
||||
|
||||
// RUN: rm -rf %t
|
||||
// RUN: split-file %s %t
|
||||
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
|
||||
|
||||
// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
|
||||
// RUN: -format experimental-full > %t/deps.json
|
||||
// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
|
||||
|
||||
// CHECK: {
|
||||
// CHECK-NEXT: "modules": [
|
||||
// CHECK: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK: "-Wall"
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH1:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "command-line": [
|
||||
// CHECK-NOT: "-Wall"
|
||||
// CHECK: ]
|
||||
// CHECK: "context-hash": "[[HASH2:.*]]"
|
||||
// CHECK: "name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "translation-units": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH1]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK: "-Wall"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu1.c"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: {
|
||||
// CHECK: "clang-module-deps": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "context-hash": "[[HASH2]]"
|
||||
// CHECK-NEXT: "module-name": "Mod"
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: "command-line": [
|
||||
// CHECK-NOT: "-Wall"
|
||||
// CHECK: ]
|
||||
// CHECK: "input-file": "{{.*}}tu2.c"
|
||||
|
||||
//--- cdb.json.template
|
||||
[
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -Wall -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu1.c"
|
||||
},
|
||||
{
|
||||
"directory": "DIR",
|
||||
"command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
|
||||
"file": "DIR/tu2.c"
|
||||
},
|
||||
]
|
||||
|
||||
//--- module.modulemap
|
||||
module Mod { header "Mod.h" }
|
||||
|
||||
//--- Mod.h
|
||||
|
||||
//--- tu1.c
|
||||
#include "Mod.h"
|
||||
|
||||
//--- tu2.c
|
||||
#include "Mod.h"
|
Loading…
Reference in New Issue