forked from OSchip/llvm-project
[pseudo] Split greatergreater token.
For a >> token (a right shift operator, or a nested template?), the clang lexer always returns a single greatergreater token, as a result, the grammar-based GLR parser never try to parse the nested template case. We derive a token stream by always splitting the >> token, so that the GLR parser is able to pursue both options during parsing (usually 1 path fails). Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D121678
This commit is contained in:
parent
2cdf5ef136
commit
f66d3758bd
|
@ -180,7 +180,8 @@ enum class LexFlags : uint8_t {
|
|||
NeedsCleaning = 1 << 1,
|
||||
};
|
||||
|
||||
/// Derives a token stream by decoding escapes and interpreting raw_identifiers.
|
||||
/// Derives a token stream by decoding escapes, interpreting raw_identifiers and
|
||||
/// splitting the greatergreater token.
|
||||
///
|
||||
/// Tokens containing UCNs, escaped newlines, trigraphs etc are decoded and
|
||||
/// their backing data is owned by the returned stream.
|
||||
|
|
|
@ -98,9 +98,21 @@ TokenStream cook(const TokenStream &Code, const LangOptions &LangOpts) {
|
|||
Tok.Length = Text.size();
|
||||
Tok.Flags &= ~static_cast<decltype(Tok.Flags)>(LexFlags::NeedsCleaning);
|
||||
}
|
||||
// Cook raw_identifiers into identifier, keyword, etc.
|
||||
if (Tok.Kind == tok::raw_identifier)
|
||||
|
||||
if (Tok.Kind == tok::raw_identifier) {
|
||||
// Cook raw_identifiers into identifier, keyword, etc.
|
||||
Tok.Kind = Identifiers.get(Tok.text()).getTokenID();
|
||||
} else if (Tok.Kind == tok::greatergreater) {
|
||||
// Split the greatergreater token.
|
||||
// FIXME: split lessless token to support Cuda triple angle brackets <<<.
|
||||
assert(Tok.text() == ">>");
|
||||
Tok.Kind = tok::greater;
|
||||
Tok.Length = 1;
|
||||
Result.push(Tok);
|
||||
// Line is wrong if the first greater is followed by an escaped newline!
|
||||
Tok.Data = Tok.text().data() + 1;
|
||||
}
|
||||
|
||||
Result.push(std::move(Tok));
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
# - the file merely describes the core C++ grammar. Preprocessor directives and
|
||||
# lexical conversions are omitted as we reuse clang's lexer and run a fake
|
||||
# preprocessor;
|
||||
# - grammar rules with the >> token are adjusted, the greatergreater token is
|
||||
# split into two > tokens, to make the GLR parser aware of nested templates
|
||||
# and right shift operator;
|
||||
#
|
||||
# Guidelines:
|
||||
# - non-terminals are lower_case; terminals (aka tokens) correspond to
|
||||
|
@ -96,7 +99,7 @@ fold-operator := %
|
|||
fold-operator := ^
|
||||
fold-operator := |
|
||||
fold-operator := <<
|
||||
fold-operator := >>
|
||||
fold-operator := greatergreater
|
||||
fold-operator := +=
|
||||
fold-operator := -=
|
||||
fold-operator := *=
|
||||
|
@ -202,7 +205,7 @@ additive-expression := additive-expression - multiplicative-expression
|
|||
# expr.shift
|
||||
shift-expression := additive-expression
|
||||
shift-expression := shift-expression << additive-expression
|
||||
shift-expression := shift-expression >> additive-expression
|
||||
shift-expression := shift-expression greatergreater additive-expression
|
||||
# expr.spaceship
|
||||
compare-expression := shift-expression
|
||||
compare-expression := compare-expression <=> shift-expression
|
||||
|
@ -615,7 +618,7 @@ operator-name := <=>
|
|||
operator-name := ^^
|
||||
operator-name := ||
|
||||
operator-name := <<
|
||||
operator-name := >>
|
||||
operator-name := greatergreater
|
||||
operator-name := <<=
|
||||
operator-name := >>=
|
||||
operator-name := ++
|
||||
|
@ -737,3 +740,8 @@ contextual-zero := NUMERIC_CONSTANT
|
|||
module-keyword := IDENTIFIER
|
||||
import-keyword := IDENTIFIER
|
||||
export-keyword := IDENTIFIER
|
||||
|
||||
#! greatergreater token -- clang lexer always lexes it as a single token, we
|
||||
#! split it into two tokens to make the GLR parser aware of the nested-template
|
||||
#! case.
|
||||
greatergreater := > >
|
||||
|
|
|
@ -171,6 +171,25 @@ no_indent \
|
|||
}));
|
||||
}
|
||||
|
||||
TEST(TokenTest, SplitGreaterGreater) {
|
||||
LangOptions Opts;
|
||||
std::string Code = R"cpp(
|
||||
>> // split
|
||||
// >> with an escaped newline in the middle, split
|
||||
>\
|
||||
>
|
||||
>>= // not split
|
||||
)cpp";
|
||||
TokenStream Split = stripComments(cook(lex(Code, Opts), Opts));
|
||||
EXPECT_THAT(Split.tokens(), ElementsAreArray({
|
||||
token(">", tok::greater),
|
||||
token(">", tok::greater),
|
||||
token(">", tok::greater),
|
||||
token(">", tok::greater),
|
||||
token(">>=", tok::greatergreaterequal),
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(TokenTest, DropComments) {
|
||||
LangOptions Opts;
|
||||
std::string Code = R"cpp(
|
||||
|
|
Loading…
Reference in New Issue