[C++20][Modules][1/8] Track valid import state.

In C++20 modules imports must be together and at the start of the module.
Rather than growing more ad-hoc flags to test state, this keeps track of the
phase of of a valid module TU (first decl, global module frag, module,
private module frag).  If the phasing is broken (with some diagnostic) the
pattern does not conform to a valid C++20 module, and we set the state
accordingly.

We can thus issue diagnostics when imports appear in the wrong places and
decouple the C++20 modules state from other module variants (modules-ts and
clang modules).  Additionally, we attempt to diagnose wrong imports before
trying to find the module where possible (the latter will generally emit an
unhelpful diagnostic about the module not being available).

Although this generally simplifies the handling of C++20 module import
diagnostics, the motivation was that, in particular, it allows detecting
invalid imports like:

import module A;

int some_decl();

import module B;

where being in a module purview is insufficient to identify them.

Differential Revision: https://reviews.llvm.org/D118893
This commit is contained in:
Iain Sandoe 2021-02-11 00:15:56 +00:00
parent 2a46450849
commit 8a3f9a584a
9 changed files with 286 additions and 37 deletions

View File

@ -1539,6 +1539,10 @@ def err_private_module_fragment_expected_semi : Error<
def err_missing_before_module_end : Error<"expected %0 at end of module">;
def err_unsupported_module_partition : Error<
"sorry, module partitions are not yet supported">;
def err_import_not_allowed_here : Error<
"imports must immediately follow the module declaration">;
def err_import_in_wrong_fragment : Error<
"module%select{| partition}0 imports cannot be in the %select{global|private}1 module fragment">;
def err_export_empty : Error<"export declaration cannot be empty">;
}

View File

@ -464,14 +464,17 @@ public:
void Initialize();
/// Parse the first top-level declaration in a translation unit.
bool ParseFirstTopLevelDecl(DeclGroupPtrTy &Result);
bool ParseFirstTopLevelDecl(DeclGroupPtrTy &Result,
Sema::ModuleImportState &ImportState);
/// ParseTopLevelDecl - Parse one top-level declaration. Returns true if
/// the EOF was encountered.
bool ParseTopLevelDecl(DeclGroupPtrTy &Result, bool IsFirstDecl = false);
bool ParseTopLevelDecl(DeclGroupPtrTy &Result,
Sema::ModuleImportState &ImportState);
bool ParseTopLevelDecl() {
DeclGroupPtrTy Result;
return ParseTopLevelDecl(Result);
Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module;
return ParseTopLevelDecl(Result, IS);
}
/// ConsumeToken - Consume the current 'peek token' and lex the next one.
@ -3491,8 +3494,9 @@ private:
//===--------------------------------------------------------------------===//
// Modules
DeclGroupPtrTy ParseModuleDecl(bool IsFirstDecl);
Decl *ParseModuleImport(SourceLocation AtLoc);
DeclGroupPtrTy ParseModuleDecl(Sema::ModuleImportState &ImportState);
Decl *ParseModuleImport(SourceLocation AtLoc,
Sema::ModuleImportState &ImportState);
bool parseMisplacedModuleImport();
bool tryParseMisplacedModuleImport() {
tok::TokenKind Kind = Tok.getKind();

View File

@ -2949,11 +2949,24 @@ public:
Implementation, ///< 'module X;'
};
/// An enumeration to represent the transition of states in parsing module
/// fragments and imports. If we are not parsing a C++20 TU, or we find
/// an error in state transition, the state is set to NotACXX20Module.
enum class ModuleImportState {
FirstDecl, ///< Parsing the first decl in a TU.
GlobalFragment, ///< after 'module;' but before 'module X;'
ImportAllowed, ///< after 'module X;' but before any non-import decl.
ImportFinished, ///< after any non-import decl.
PrivateFragment, ///< after 'module :private;'.
NotACXX20Module ///< Not a C++20 TU, or an invalid state was found.
};
/// The parser has processed a module-declaration that begins the definition
/// of a module interface or implementation.
DeclGroupPtrTy ActOnModuleDecl(SourceLocation StartLoc,
SourceLocation ModuleLoc, ModuleDeclKind MDK,
ModuleIdPath Path, bool IsFirstDecl);
ModuleIdPath Path,
ModuleImportState &ImportState);
/// The parser has processed a global-module-fragment declaration that begins
/// the definition of the global module fragment of the current module unit.

View File

@ -164,8 +164,9 @@ IncrementalParser::ParseOrWrapTopLevelDecl() {
}
Parser::DeclGroupPtrTy ADecl;
for (bool AtEOF = P->ParseFirstTopLevelDecl(ADecl); !AtEOF;
AtEOF = P->ParseTopLevelDecl(ADecl)) {
Sema::ModuleImportState ImportState;
for (bool AtEOF = P->ParseFirstTopLevelDecl(ADecl, ImportState); !AtEOF;
AtEOF = P->ParseTopLevelDecl(ADecl, ImportState)) {
// If we got a null return and something *was* parsed, ignore it. This
// is due to a top-level semicolon, an action override, or a parse error
// skipping something.

View File

@ -154,8 +154,9 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
llvm::TimeTraceScope TimeScope("Frontend");
P.Initialize();
Parser::DeclGroupPtrTy ADecl;
for (bool AtEOF = P.ParseFirstTopLevelDecl(ADecl); !AtEOF;
AtEOF = P.ParseTopLevelDecl(ADecl)) {
Sema::ModuleImportState ImportState;
for (bool AtEOF = P.ParseFirstTopLevelDecl(ADecl, ImportState); !AtEOF;
AtEOF = P.ParseTopLevelDecl(ADecl, ImportState)) {
// If we got a null return and something *was* parsed, ignore it. This
// is due to a top-level semicolon, an action override, or a parse error
// skipping something.

View File

@ -79,7 +79,8 @@ Parser::ParseObjCAtDirectives(ParsedAttributesWithRange &Attrs) {
break;
case tok::objc_import:
if (getLangOpts().Modules || getLangOpts().DebuggerSupport) {
SingleDecl = ParseModuleImport(AtLoc);
Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module;
SingleDecl = ParseModuleImport(AtLoc, IS);
break;
}
Diag(AtLoc, diag::err_atimport);

View File

@ -581,15 +581,20 @@ void Parser::DestroyTemplateIds() {
/// top-level-declaration-seq[opt] private-module-fragment[opt]
///
/// Note that in C, it is an error if there is no first declaration.
bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result) {
bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result,
Sema::ModuleImportState &ImportState) {
Actions.ActOnStartOfTranslationUnit();
// For C++20 modules, a module decl must be the first in the TU. We also
// need to track module imports.
ImportState = Sema::ModuleImportState::FirstDecl;
bool NoTopLevelDecls = ParseTopLevelDecl(Result, ImportState);
// C11 6.9p1 says translation units must have at least one top-level
// declaration. C++ doesn't have this restriction. We also don't want to
// complain if we have a precompiled header, although technically if the PCH
// is empty we should still emit the (pedantic) diagnostic.
// If the main file is a header, we're only pretending it's a TU; don't warn.
bool NoTopLevelDecls = ParseTopLevelDecl(Result, true);
if (NoTopLevelDecls && !Actions.getASTContext().getExternalSource() &&
!getLangOpts().CPlusPlus && !getLangOpts().IsHeaderFile)
Diag(diag::ext_empty_translation_unit);
@ -603,7 +608,8 @@ bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result) {
/// top-level-declaration:
/// declaration
/// [C++20] module-import-declaration
bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, bool IsFirstDecl) {
bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
Sema::ModuleImportState &ImportState) {
DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(*this);
// Skip over the EOF token, flagging end of previous input for incremental
@ -647,13 +653,12 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, bool IsFirstDecl) {
case tok::kw_module:
module_decl:
Result = ParseModuleDecl(IsFirstDecl);
Result = ParseModuleDecl(ImportState);
return false;
// tok::kw_import is handled by ParseExternalDeclaration. (Under the Modules
// TS, an import can occur within an export block.)
case tok::kw_import:
import_decl: {
Decl *ImportDecl = ParseModuleImport(SourceLocation());
Decl *ImportDecl = ParseModuleImport(SourceLocation(), ImportState);
Result = Actions.ConvertDeclToDeclGroup(ImportDecl);
return false;
}
@ -669,12 +674,14 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, bool IsFirstDecl) {
Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
ImportState = Sema::ModuleImportState::NotACXX20Module;
return false;
case tok::annot_module_end:
Actions.ActOnModuleEnd(Tok.getLocation(), reinterpret_cast<Module *>(
Tok.getAnnotationValue()));
ConsumeAnnotationToken();
ImportState = Sema::ModuleImportState::NotACXX20Module;
return false;
case tok::eof:
@ -718,6 +725,16 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, bool IsFirstDecl) {
MaybeParseCXX11Attributes(attrs);
Result = ParseExternalDeclaration(attrs);
// An empty Result might mean a line with ';' or some parsing error, ignore
// it.
if (Result) {
if (ImportState == Sema::ModuleImportState::FirstDecl)
// First decl was not modular.
ImportState = Sema::ModuleImportState::NotACXX20Module;
else if (ImportState == Sema::ModuleImportState::ImportAllowed)
// Non-imports disallow further imports.
ImportState = Sema::ModuleImportState::ImportFinished;
}
return false;
}
@ -887,11 +904,17 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
getCurScope(),
CurParsedObjCImpl ? Sema::PCC_ObjCImplementation : Sema::PCC_Namespace);
return nullptr;
case tok::kw_import:
SingleDecl = ParseModuleImport(SourceLocation());
break;
case tok::kw_import: {
Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module;
if (getLangOpts().CPlusPlusModules) {
llvm_unreachable("not expecting a c++20 import here");
ProhibitAttributes(attrs);
}
SingleDecl = ParseModuleImport(SourceLocation(), IS);
} break;
case tok::kw_export:
if (getLangOpts().CPlusPlusModules || getLangOpts().ModulesTS) {
ProhibitAttributes(attrs);
SingleDecl = ParseExportDeclaration();
break;
}
@ -2291,7 +2314,8 @@ void Parser::ParseMicrosoftIfExistsExternalDeclaration() {
/// attribute-specifier-seq[opt] ';'
/// private-module-fragment: [C++2a]
/// 'module' ':' 'private' ';' top-level-declaration-seq[opt]
Parser::DeclGroupPtrTy Parser::ParseModuleDecl(bool IsFirstDecl) {
Parser::DeclGroupPtrTy
Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
SourceLocation StartLoc = Tok.getLocation();
Sema::ModuleDeclKind MDK = TryConsumeToken(tok::kw_export)
@ -2311,7 +2335,7 @@ Parser::DeclGroupPtrTy Parser::ParseModuleDecl(bool IsFirstDecl) {
// Parse a global-module-fragment, if present.
if (getLangOpts().CPlusPlusModules && Tok.is(tok::semi)) {
SourceLocation SemiLoc = ConsumeToken();
if (!IsFirstDecl) {
if (ImportState != Sema::ModuleImportState::FirstDecl) {
Diag(StartLoc, diag::err_global_module_introducer_not_at_start)
<< SourceRange(StartLoc, SemiLoc);
return nullptr;
@ -2320,6 +2344,7 @@ Parser::DeclGroupPtrTy Parser::ParseModuleDecl(bool IsFirstDecl) {
Diag(StartLoc, diag::err_module_fragment_exported)
<< /*global*/0 << FixItHint::CreateRemoval(StartLoc);
}
ImportState = Sema::ModuleImportState::GlobalFragment;
return Actions.ActOnGlobalModuleFragmentDecl(ModuleLoc);
}
@ -2334,6 +2359,7 @@ Parser::DeclGroupPtrTy Parser::ParseModuleDecl(bool IsFirstDecl) {
SourceLocation PrivateLoc = ConsumeToken();
DiagnoseAndSkipCXX11Attributes();
ExpectAndConsumeSemi(diag::err_private_module_fragment_expected_semi);
ImportState = Sema::ModuleImportState::PrivateFragment;
return Actions.ActOnPrivateModuleFragmentDecl(ModuleLoc, PrivateLoc);
}
@ -2361,7 +2387,7 @@ Parser::DeclGroupPtrTy Parser::ParseModuleDecl(bool IsFirstDecl) {
ExpectAndConsumeSemi(diag::err_module_expected_semi);
return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, IsFirstDecl);
return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, ImportState);
}
/// Parse a module import declaration. This is essentially the same for
@ -2379,7 +2405,8 @@ Parser::DeclGroupPtrTy Parser::ParseModuleDecl(bool IsFirstDecl) {
/// attribute-specifier-seq[opt] ';'
/// 'export'[opt] 'import' header-name
/// attribute-specifier-seq[opt] ';'
Decl *Parser::ParseModuleImport(SourceLocation AtLoc) {
Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
Sema::ModuleImportState &ImportState) {
SourceLocation StartLoc = AtLoc.isInvalid() ? Tok.getLocation() : AtLoc;
SourceLocation ExportLoc;
@ -2428,6 +2455,42 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc) {
return nullptr;
}
// Diagnose mis-imports.
bool SeenError = true;
switch (ImportState) {
case Sema::ModuleImportState::ImportAllowed:
SeenError = false;
break;
case Sema::ModuleImportState::FirstDecl:
case Sema::ModuleImportState::NotACXX20Module:
// TODO: These cases will be an error when partitions are implemented.
SeenError = false;
break;
case Sema::ModuleImportState::GlobalFragment:
// We can only have pre-processor directives in the global module
// fragment. We can, however have a header unit import here.
if (!HeaderUnit)
// We do not have partition support yet, so first arg is 0.
Diag(ImportLoc, diag::err_import_in_wrong_fragment) << 0 << 0;
else
SeenError = false;
break;
case Sema::ModuleImportState::ImportFinished:
if (getLangOpts().CPlusPlusModules)
Diag(ImportLoc, diag::err_import_not_allowed_here);
else
SeenError = false;
break;
case Sema::ModuleImportState::PrivateFragment:
// We do not have partition support yet, so first arg is 0.
Diag(ImportLoc, diag::err_import_in_wrong_fragment) << 0 << 1;
break;
}
if (SeenError) {
ExpectAndConsumeSemi(diag::err_module_expected_semi);
return nullptr;
}
DeclResult Import;
if (HeaderUnit)
Import =

View File

@ -80,12 +80,20 @@ Sema::ActOnGlobalModuleFragmentDecl(SourceLocation ModuleLoc) {
return nullptr;
}
Sema::DeclGroupPtrTy
Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
ModuleDeclKind MDK, ModuleIdPath Path, bool IsFirstDecl) {
Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
SourceLocation ModuleLoc,
ModuleDeclKind MDK,
ModuleIdPath Path,
ModuleImportState &ImportState) {
assert((getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) &&
"should only have module decl in Modules TS or C++20");
bool IsFirstDecl = ImportState == ModuleImportState::FirstDecl;
bool SeenGMF = ImportState == ModuleImportState::GlobalFragment;
// If any of the steps here fail, we count that as invalidating C++20
// module state;
ImportState = ModuleImportState::NotACXX20Module;
// A module implementation unit requires that we are not compiling a module
// of any kind. A module interface unit requires that we are not compiling a
// module map.
@ -134,9 +142,13 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
ModuleScopes.back().Module->Kind == Module::GlobalModuleFragment)
GlobalModuleFragment = ModuleScopes.back().Module;
assert((!getLangOpts().CPlusPlusModules ||
SeenGMF == (bool)GlobalModuleFragment) &&
"mismatched global module state");
// In C++20, the module-declaration must be the first declaration if there
// is no global module fragment.
if (getLangOpts().CPlusPlusModules && !IsFirstDecl && !GlobalModuleFragment) {
if (getLangOpts().CPlusPlusModules && !IsFirstDecl && !SeenGMF) {
Diag(ModuleLoc, diag::err_module_decl_not_at_start);
SourceLocation BeginLoc =
ModuleScopes.empty()
@ -231,6 +243,10 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
TU->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate);
TU->setLocalOwningModule(Mod);
// We are in the module purview, but before any other (non import)
// statements, so imports are allowed.
ImportState = ModuleImportState::ImportAllowed;
// FIXME: Create a ModuleDecl.
return nullptr;
}
@ -301,10 +317,10 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
SourceLocation ExportLoc,
SourceLocation ImportLoc,
ModuleIdPath Path) {
// Flatten the module path for a Modules TS module name.
// Flatten the module path for a C++20 or Modules TS module name.
std::pair<IdentifierInfo *, SourceLocation> ModuleNameLoc;
if (getLangOpts().ModulesTS) {
std::string ModuleName;
if (getLangOpts().CPlusPlusModules || getLangOpts().ModulesTS) {
for (auto &Piece : Path) {
if (!ModuleName.empty())
ModuleName += ".";
@ -314,6 +330,14 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
Path = ModuleIdPath(ModuleNameLoc);
}
// Diagnose self-import before attempting a load.
if (getLangOpts().CPlusPlusModules && isCurrentModulePurview() &&
getCurrentModule()->Name == ModuleName) {
Diag(ImportLoc, diag::err_module_self_import)
<< ModuleName << getLangOpts().CurrentModule;
return true;
}
Module *Mod =
getModuleLoader().loadModule(ImportLoc, Path, Module::AllVisible,
/*IsInclusionDirective=*/false);
@ -342,11 +366,9 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
// FIXME: we should support importing a submodule within a different submodule
// of the same top-level module. Until we do, make it an error rather than
// silently ignoring the import.
// Import-from-implementation is valid in the Modules TS. FIXME: Should we
// warn on a redundant import of the current module?
// FIXME: Import of a module from an implementation partition of the same
// module is permitted.
if (Mod->getTopLevelModuleName() == getLangOpts().CurrentModule &&
// FIXME: Should we warn on a redundant import of the current module?
if (!getLangOpts().CPlusPlusModules &&
Mod->getTopLevelModuleName() == getLangOpts().CurrentModule &&
(getLangOpts().isCompilingModule() || !getLangOpts().ModulesTS)) {
Diag(ImportLoc, getLangOpts().isCompilingModule()
? diag::err_module_self_import

View File

@ -0,0 +1,140 @@
// RUN: rm -rf %t
// RUN: mkdir -p %t
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=0 -x c++ %s \
// RUN: -o %t/B.pcm
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=1 -x c++ %s \
// RUN: -o %t/C.pcm
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=2 -x c++ %s \
// RUN: -fmodule-file=%t/B.pcm -fmodule-file=%t/C.pcm -o %t/AOK1.pcm
// RUN: %clang_cc1 -std=c++20 -S -D TU=3 -x c++ %s \
// RUN: -fmodule-file=%t/AOK1.pcm -o %t/tu_3.s -verify
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=4 -x c++ %s \
// RUN: -fmodule-file=%t/B.pcm -fmodule-file=%t/C.pcm -o %t/BC.pcm -verify
// RUN: %clang_cc1 -std=c++20 -S -D TU=5 -x c++ %s \
// RUN: -fmodule-file=%t/B.pcm -fmodule-file=%t/C.pcm -o %t/tu_5.s -verify
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=6 -x c++ %s \
// RUN: -fmodule-file=%t/B.pcm -o %t/D.pcm -verify
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=7 -x c++ %s \
// RUN: -fmodule-file=%t/B.pcm -o %t/D.pcm -verify
// RUN: %clang_cc1 -std=c++20 -S -D TU=8 -x c++ %s \
// RUN: -fmodule-file=%t/B.pcm -o %t/tu_8.s -verify
// RUN: %clang_cc1 -std=c++20 -emit-module-interface -D TU=9 -x c++ %s \
// RUN: -o %t/B.pcm -verify
// RUN: %clang_cc1 -std=c++20 -emit-obj -D TU=10 -x c++ %s \
// RUN: -fmodule-file=%t/C.pcm -o %t/impl.o
// Test diagnostics for incorrect module import sequences.
#if TU == 0
export module B;
int foo ();
// expected-no-diagnostics
#elif TU == 1
export module C;
int bar ();
// expected-no-diagnostics
#elif TU == 2
export module AOK1;
import B;
export import C;
export int theAnswer ();
// expected-no-diagnostics
#elif TU == 3
module;
module AOK1;
export import C; // expected-error {{export declaration can only be used within a module interface unit}}
int theAnswer () { return 42; }
#elif TU == 4
export module BC;
export import B;
int foo () { return 10; }
import C; // expected-error {{imports must immediately follow the module declaration}}
#elif TU == 5
module B; // implicitly imports B.
int foo () { return 10; }
import C; // expected-error {{imports must immediately follow the module declaration}}
#elif TU == 6
module;
// We can only have preprocessor commands here, which could include an include
// translated header unit. However those are identified specifically by the
// preprocessor; non-preprocessed user code should not contain an import here.
import B; // expected-error {{module imports cannot be in the global module fragment}}
export module D;
int delta ();
#elif TU == 7
export module D;
int delta ();
module :private;
import B; // expected-error {{module imports cannot be in the private module fragment}}
#elif TU == 8
module B;
import B; // expected-error {{import of module 'B' appears within same top-level module 'B'}}
#elif TU == 9
export module B;
import B; // expected-error {{import of module 'B' appears within same top-level module 'B'}}
#elif TU == 10
int x;
import C;
int baz() { return 6174; }
// expected-no-diagnostics
#else
#error "no MODE set"
#endif