[Frontend] Only compile modules if not already finalized

It was possible to re-add a module to a shared in-memory module cache
when search paths are changed. This can eventually cause a crash if the
original module is referenced after this occurs.
  1. Module A depends on B
  2. B exists in two paths C and D
  3. First run only has C on the search path, finds A and B and loads
     them
  4. Second run adds D to the front of the search path. A is loaded and
     contains a reference to the already compiled module from C. But
     searching finds the module from D instead, causing a mismatch
  5. B and the modules that depend on it are considered out of date and
     thus rebuilt
  6. The recompiled module A is added to the in-memory cache, freeing
     the previously inserted one

This can never occur from a regular clang process, but is very easy to
do through the API - whether through the use of a shared case or just
running multiple compilations from a single `CompilerInstance`. Update
the compilation to return early if a module is already finalized so that
the pre-condition in the in-memory module cache holds.

Resolves rdar://78180255

Differential Revision: https://reviews.llvm.org/D105328
This commit is contained in:
Ben Barham 2021-07-15 18:24:09 -07:00 committed by Volodymyr Sapsai
parent 3c4023b225
commit 766a08df12
7 changed files with 220 additions and 11 deletions

View File

@ -111,6 +111,8 @@ def err_module_cycle : Error<"cyclic dependency in module '%0': %1">,
DefaultFatal;
def err_module_prebuilt : Error<
"error in loading module '%0' from prebuilt module path">, DefaultFatal;
def err_module_rebuild_finalized : Error<
"cannot rebuild module '%0' as it is already finalized">, DefaultFatal;
def note_pragma_entered_here : Note<"#pragma entered here">;
def note_decl_hiding_tag_type : Note<
"%1 %0 is hidden by a non-type declaration of %0 here">;

View File

@ -69,6 +69,9 @@ def err_module_file_not_module : Error<
"AST file '%0' was not built as a module">, DefaultFatal;
def err_module_file_missing_top_level_submodule : Error<
"module file '%0' is missing its top-level submodule">, DefaultFatal;
def note_module_file_conflict : Note<
"this is generally caused by modules with the same name found in multiple "
"paths">;
def remark_module_import : Remark<
"importing module '%0'%select{| into '%3'}2 from '%1'">,

View File

@ -1401,6 +1401,9 @@ private:
llvm::iterator_range<PreprocessingRecord::iterator>
getModulePreprocessedEntities(ModuleFile &Mod) const;
bool canRecoverFromOutOfDate(StringRef ModuleFileName,
unsigned ClientLoadCapabilities);
public:
class ModuleDeclIterator
: public llvm::iterator_adaptor_base<

View File

@ -1054,6 +1054,15 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
[](CompilerInstance &) {}) {
llvm::TimeTraceScope TimeScope("Module Compile", ModuleName);
// Never compile a module that's already finalized - this would cause the
// existing module to be freed, causing crashes if it is later referenced
if (ImportingInstance.getModuleCache().isPCMFinal(ModuleFileName)) {
ImportingInstance.getDiagnostics().Report(
ImportLoc, diag::err_module_rebuild_finalized)
<< ModuleName;
return false;
}
// Construct a compiler invocation for creating this module.
auto Invocation =
std::make_shared<CompilerInvocation>(ImportingInstance.getInvocation());

View File

@ -2764,7 +2764,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
// If requested by the caller and the module hasn't already been read
// or compiled, mark modules on error as out-of-date.
if ((ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate) &&
!ModuleMgr.getModuleCache().isPCMFinal(F.FileName))
canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
return OutOfDate;
if (!AllowASTWithCompilerErrors) {
@ -2850,9 +2850,14 @@ ASTReader::ReadControlBlock(ModuleFile &F,
StoredSignature, Capabilities);
// If we diagnosed a problem, produce a backtrace.
if (isDiagnosedResult(Result, Capabilities))
bool recompilingFinalized =
Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
getModuleManager().getModuleCache().isPCMFinal(F.FileName);
if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
Diag(diag::note_module_file_imported_by)
<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
if (recompilingFinalized)
Diag(diag::note_module_file_conflict);
switch (Result) {
case Failure: return Failure;
@ -2918,7 +2923,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
F.Kind != MK_ExplicitModule && F.Kind != MK_PrebuiltModule) {
auto BuildDir = PP.getFileManager().getDirectory(Blob);
if (!BuildDir || *BuildDir != M->Directory) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_imported_module_relocated)
<< F.ModuleName << Blob << M->Directory->getName();
return OutOfDate;
@ -3928,7 +3933,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
DisableValidationForModuleKind::Module) &&
!ModMap) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) {
if (auto ASTFE = M ? M->getASTFile() : None) {
// This module was defined by an imported (explicit) module.
Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName
@ -3959,7 +3964,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
assert((ImportedBy || F.Kind == MK_ImplicitModule) &&
"top-level import should be verified");
bool NotImported = F.Kind == MK_ImplicitModule && !ImportedBy;
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_imported_module_modmap_changed)
<< F.ModuleName << (NotImported ? F.FileName : ImportedBy->FileName)
<< ModMap->getName() << F.ModuleMapPath << NotImported;
@ -3970,13 +3975,13 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
for (unsigned I = 0, N = Record[Idx++]; I < N; ++I) {
// FIXME: we should use input files rather than storing names.
std::string Filename = ReadPath(F, Record, Idx);
auto F = FileMgr.getFile(Filename, false, false);
if (!F) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
auto SF = FileMgr.getFile(Filename, false, false);
if (!SF) {
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Error("could not find file '" + Filename +"' referenced by AST file");
return OutOfDate;
}
AdditionalStoredMaps.insert(*F);
AdditionalStoredMaps.insert(*SF);
}
// Check any additional module map files (e.g. module.private.modulemap)
@ -3986,7 +3991,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
// Remove files that match
// Note: SmallPtrSet::erase is really remove
if (!AdditionalStoredMaps.erase(ModMap)) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_module_different_modmap)
<< F.ModuleName << /*new*/0 << ModMap->getName();
return OutOfDate;
@ -3997,7 +4002,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
// Check any additional module map files that are in the pcm, but not
// found in header search. Cases that match are already removed.
for (const FileEntry *ModMap : AdditionalStoredMaps) {
if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
Diag(diag::err_module_different_modmap)
<< F.ModuleName << /*not new*/1 << ModMap->getName();
return OutOfDate;
@ -5924,6 +5929,12 @@ ASTReader::getModulePreprocessedEntities(ModuleFile &Mod) const {
PreprocessingRecord::iterator());
}
bool ASTReader::canRecoverFromOutOfDate(StringRef ModuleFileName,
unsigned int ClientLoadCapabilities) {
return ClientLoadCapabilities & ARR_OutOfDate &&
!getModuleManager().getModuleCache().isPCMFinal(ModuleFileName);
}
llvm::iterator_range<ASTReader::ModuleDeclIterator>
ASTReader::getModuleFileLevelDecls(ModuleFile &Mod) {
return llvm::make_range(

View File

@ -6,12 +6,14 @@ set(LLVM_LINK_COMPONENTS
add_clang_unittest(SerializationTests
InMemoryModuleCacheTest.cpp
ModuleCacheTest.cpp
)
clang_target_link_libraries(SerializationTests
PRIVATE
clangAST
clangBasic
clangFrontend
clangLex
clangSema
clangSerialization

View File

@ -0,0 +1,179 @@
//===- unittests/Serialization/ModuleCacheTest.cpp - CI tests -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/FileManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Lex/HeaderSearch.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace clang;
namespace {
class ModuleCacheTest : public ::testing::Test {
void SetUp() override {
ASSERT_FALSE(sys::fs::createUniqueDirectory("modulecache-test", TestDir));
ModuleCachePath = SmallString<256>(TestDir);
sys::path::append(ModuleCachePath, "mcp");
ASSERT_FALSE(sys::fs::create_directories(ModuleCachePath));
}
void TearDown() override { sys::fs::remove_directories(TestDir); }
public:
SmallString<256> TestDir;
SmallString<256> ModuleCachePath;
void addFile(StringRef Path, StringRef Contents) {
ASSERT_FALSE(sys::path::is_absolute(Path));
SmallString<256> AbsPath(TestDir);
sys::path::append(AbsPath, Path);
std::error_code EC;
ASSERT_FALSE(
sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath)));
llvm::raw_fd_ostream OS(AbsPath, EC);
ASSERT_FALSE(EC);
OS << Contents;
}
void addDuplicateFrameworks() {
addFile("test.m", R"cpp(
@import Top;
)cpp");
addFile("frameworks/Top.framework/Headers/top.h", R"cpp(
@import M;
)cpp");
addFile("frameworks/Top.framework/Modules/module.modulemap", R"cpp(
framework module Top [system] {
header "top.h"
export *
}
)cpp");
addFile("frameworks/M.framework/Headers/m.h", R"cpp(
void foo();
)cpp");
addFile("frameworks/M.framework/Modules/module.modulemap", R"cpp(
framework module M [system] {
header "m.h"
export *
}
)cpp");
addFile("frameworks2/M.framework/Headers/m.h", R"cpp(
void foo();
)cpp");
addFile("frameworks2/M.framework/Modules/module.modulemap", R"cpp(
framework module M [system] {
header "m.h"
export *
}
)cpp");
}
};
TEST_F(ModuleCacheTest, CachedModuleNewPath) {
addDuplicateFrameworks();
SmallString<256> MCPArg("-fmodules-cache-path=");
MCPArg.append(ModuleCachePath);
IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
CompilerInstance::createDiagnostics(new DiagnosticOptions());
// First run should pass with no errors
const char *Args[] = {"clang", "-fmodules", "-Fframeworks",
MCPArg.c_str(), "-working-directory", TestDir.c_str(),
"test.m"};
std::shared_ptr<CompilerInvocation> Invocation =
createInvocationFromCommandLine(Args, Diags);
ASSERT_TRUE(Invocation);
CompilerInstance Instance;
Instance.setDiagnostics(Diags.get());
Instance.setInvocation(Invocation);
SyntaxOnlyAction Action;
ASSERT_TRUE(Instance.ExecuteAction(Action));
ASSERT_FALSE(Diags->hasErrorOccurred());
// Now add `frameworks2` to the search path. `Top.pcm` will have a reference
// to the `M` from `frameworks`, but a search will find the `M` from
// `frameworks2` - causing a mismatch and it to be considered out of date.
//
// Normally this would be fine - `M` and the modules it depends on would be
// rebuilt. However, since we have a shared module cache and thus an already
// finalized `Top`, recompiling `Top` will cause the existing module to be
// removed from the cache, causing possible crashed if it is ever used.
//
// Make sure that an error occurs instead.
const char *Args2[] = {"clang", "-fmodules", "-Fframeworks2",
"-Fframeworks", MCPArg.c_str(), "-working-directory",
TestDir.c_str(), "test.m"};
std::shared_ptr<CompilerInvocation> Invocation2 =
createInvocationFromCommandLine(Args2, Diags);
ASSERT_TRUE(Invocation2);
CompilerInstance Instance2(Instance.getPCHContainerOperations(),
&Instance.getModuleCache());
Instance2.setDiagnostics(Diags.get());
Instance2.setInvocation(Invocation2);
SyntaxOnlyAction Action2;
ASSERT_FALSE(Instance2.ExecuteAction(Action2));
ASSERT_TRUE(Diags->hasErrorOccurred());
}
TEST_F(ModuleCacheTest, CachedModuleNewPathAllowErrors) {
addDuplicateFrameworks();
SmallString<256> MCPArg("-fmodules-cache-path=");
MCPArg.append(ModuleCachePath);
IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
CompilerInstance::createDiagnostics(new DiagnosticOptions());
// First run should pass with no errors
const char *Args[] = {"clang", "-fmodules", "-Fframeworks",
MCPArg.c_str(), "-working-directory", TestDir.c_str(),
"test.m"};
std::shared_ptr<CompilerInvocation> Invocation =
createInvocationFromCommandLine(Args, Diags);
ASSERT_TRUE(Invocation);
CompilerInstance Instance;
Instance.setDiagnostics(Diags.get());
Instance.setInvocation(Invocation);
SyntaxOnlyAction Action;
ASSERT_TRUE(Instance.ExecuteAction(Action));
ASSERT_FALSE(Diags->hasErrorOccurred());
// Same as `CachedModuleNewPath` but while allowing errors. This is a hard
// failure where the module wasn't created, so it should still fail.
const char *Args2[] = {
"clang", "-fmodules", "-Fframeworks2",
"-Fframeworks", MCPArg.c_str(), "-working-directory",
TestDir.c_str(), "-Xclang", "-fallow-pcm-with-compiler-errors",
"test.m"};
std::shared_ptr<CompilerInvocation> Invocation2 =
createInvocationFromCommandLine(Args2, Diags);
ASSERT_TRUE(Invocation2);
CompilerInstance Instance2(Instance.getPCHContainerOperations(),
&Instance.getModuleCache());
Instance2.setDiagnostics(Diags.get());
Instance2.setInvocation(Invocation2);
SyntaxOnlyAction Action2;
ASSERT_FALSE(Instance2.ExecuteAction(Action2));
ASSERT_TRUE(Diags->hasErrorOccurred());
}
} // anonymous namespace