[pseudo] Eliminate the false `::` nested-name-specifier ambiguity

The solution is to favor the longest possible nest-name-specifier, and
drop other alternatives by using the guard, per per C++ [basic.lookup.qual.general].

Motivated cases:

```
Foo::Foo() {};
// the constructor can be parsed as:
//  - Foo ::Foo(); // where the first Foo is return-type, and ::Foo is the function declarator
//  + Foo::Foo(); // where Foo::Foo is the function declarator
```

```
void test() {

// a very slow parsing case when there are many qualifers!
X::Y::Z;
// The statement can be parsed as:
//  - X ::Y::Z; // ::Y::Z is the declarator
//  - X::Y ::Z; // ::Z is the declarator
//  + X::Y::Z;  // a declaration without declarator (X::Y::Z is decl-specifier-seq)
//  + X::Y::Z;  // a qualifed-id expression
}
```

Differential Revision: https://reviews.llvm.org/D130511
This commit is contained in:
Haojian Wu 2022-07-26 22:27:09 +02:00
parent dc95d0c525
commit 6f6c40a875
4 changed files with 42 additions and 1 deletions

View File

@ -90,6 +90,11 @@ struct Token {
while (T->Kind == tok::comment);
return *T;
}
/// Returns the previous token in the stream. this may not be a sentinel.
const Token &prev() const {
assert(Kind != tok::eof);
return *(this - 1);
}
/// Returns the bracket paired with this one, if any.
const Token *pair() const { return Pair == 0 ? nullptr : this + Pair; }

View File

@ -312,6 +312,14 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
IF__CONSTEXPR__L_PAREN__init_statement__condition__R_PAREN__statement,
guardNextTokenNotElse},
// Implement C++ [basic.lookup.qual.general]:
// If a name, template-id, or decltype-specifier is followed by a
// ::, it shall designate a namespace, class, enumeration, or
// dependent type, and the :: is never interpreted as a complete
// nested-name-specifier.
{rule::nested_name_specifier::COLONCOLON,
TOKEN_GUARD(coloncolon, Tok.prev().Kind != tok::identifier)},
// The grammar distinguishes (only) user-defined vs plain string literals,
// where the clang lexer distinguishes (only) encoding types.
{rule::user_defined_string_literal_chunk::STRING_LITERAL,

View File

@ -68,7 +68,7 @@ unqualified-id := ~ type-name
unqualified-id := ~ decltype-specifier
unqualified-id := template-id
qualified-id := nested-name-specifier TEMPLATE_opt unqualified-id
nested-name-specifier := ::
nested-name-specifier := :: [guard]
nested-name-specifier := type-name ::
nested-name-specifier := namespace-name ::
nested-name-specifier := decltype-specifier ::

View File

@ -0,0 +1,28 @@
// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
// Verify that we don't form a complete `::` nested-name-specifier if there is
// an identifier preceding it.
Foo::Foo() {} // No "Foo ::Foo()" false parse
// CHECK: ├─declaration-seq~function-definition := function-declarator function-body
// CHECK-NEXT: │ ├─function-declarator~noptr-declarator := noptr-declarator parameters-and-qualifiers
int ::x;
// CHECK: declaration~simple-declaration := decl-specifier-seq init-declarator-list ;
// CHECK-NEXT: ├─decl-specifier-seq~INT
void test() {
X::Y::Z; // No false qualified-declarator parses "X ::Y::Z" and "X::Y ::Z".
// CHECK: statement-seq~statement := <ambiguous>
// CHECK: statement~expression-statement := expression ;
// CHECK: statement~simple-declaration := decl-specifier-seq ;
// CHECK-NOT: simple-declaration := decl-specifier-seq init-declarator-list ;
// FIXME: eliminate the false `a<b> ::c` declaration parse.
a<b>::c;
// CHECK: statement := <ambiguous>
// CHECK-NEXT: ├─statement~expression-statement := expression ;
// CHECK-NEXT: │ ├─expression~relational-expression :=
// CHECK: └─statement~simple-declaration := <ambiguous>
// CHECK-NEXT: ├─simple-declaration := decl-specifier-seq ;
// CHECK: └─simple-declaration := decl-specifier-seq init-declarator-list ;
}