forked from OSchip/llvm-project
clang-format: [JS] nested and tagged template strings.
JavaScript template strings can be nested arbitrarily: foo = `text ${es.map(e => { return `<${e}>`; })} text`; This change lexes nested template strings using a stack of lexer states to correctly switch back to template string lexing on closing braces. Also, reuse the same stack for the token-stashed logic. Reviewers: djasper Subscribers: cfe-commits, klimek Differential Revision: https://reviews.llvm.org/D22431 llvm-svn: 279727
This commit is contained in:
parent
86ce267a4a
commit
6181da4796
|
@ -26,12 +26,11 @@ namespace format {
|
|||
FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
|
||||
const FormatStyle &Style,
|
||||
encoding::Encoding Encoding)
|
||||
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
|
||||
LessStashed(false), Column(0), TrailingWhitespace(0),
|
||||
SourceMgr(SourceMgr), ID(ID), Style(Style),
|
||||
IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
|
||||
Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
|
||||
MacroBlockBeginRegex(Style.MacroBlockBegin),
|
||||
: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
|
||||
Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
|
||||
Style(Style), IdentTable(getFormattingLangOpts(Style)),
|
||||
Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
|
||||
FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
|
||||
MacroBlockEndRegex(Style.MacroBlockEnd) {
|
||||
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
|
||||
getFormattingLangOpts(Style)));
|
||||
|
@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
|
|||
Tokens.push_back(getNextToken());
|
||||
if (Style.Language == FormatStyle::LK_JavaScript) {
|
||||
tryParseJSRegexLiteral();
|
||||
tryParseTemplateString();
|
||||
handleTemplateStrings();
|
||||
}
|
||||
tryMergePreviousTokens();
|
||||
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
|
||||
|
@ -228,17 +227,42 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
|
|||
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
|
||||
}
|
||||
|
||||
void FormatTokenLexer::tryParseTemplateString() {
|
||||
void FormatTokenLexer::handleTemplateStrings() {
|
||||
FormatToken *BacktickToken = Tokens.back();
|
||||
if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
|
||||
|
||||
if (BacktickToken->is(tok::l_brace)) {
|
||||
StateStack.push(LexerState::NORMAL);
|
||||
return;
|
||||
}
|
||||
if (BacktickToken->is(tok::r_brace)) {
|
||||
StateStack.pop();
|
||||
if (StateStack.top() != LexerState::TEMPLATE_STRING)
|
||||
return;
|
||||
// If back in TEMPLATE_STRING, fallthrough and continue parsing the
|
||||
} else if (BacktickToken->is(tok::unknown) &&
|
||||
BacktickToken->TokenText == "`") {
|
||||
StateStack.push(LexerState::TEMPLATE_STRING);
|
||||
} else {
|
||||
return; // Not actually a template
|
||||
}
|
||||
|
||||
// 'Manually' lex ahead in the current file buffer.
|
||||
const char *Offset = Lex->getBufferLocation();
|
||||
const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
|
||||
for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
|
||||
if (*Offset == '\\')
|
||||
for (; Offset != Lex->getBuffer().end(); ++Offset) {
|
||||
if (Offset[0] == '`') {
|
||||
StateStack.pop();
|
||||
break;
|
||||
}
|
||||
if (Offset[0] == '\\') {
|
||||
++Offset; // Skip the escaped character.
|
||||
} else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
|
||||
Offset[1] == '{') {
|
||||
// '${' introduces an expression interpolation in the template string.
|
||||
StateStack.push(LexerState::NORMAL);
|
||||
++Offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
|
||||
|
@ -262,7 +286,10 @@ void FormatTokenLexer::tryParseTemplateString() {
|
|||
Style.TabWidth, Encoding);
|
||||
}
|
||||
|
||||
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
|
||||
SourceLocation loc = Offset < Lex->getBuffer().end()
|
||||
? Lex->getSourceLocation(Offset + 1)
|
||||
: SourceMgr.getLocForEndOfFile(ID);
|
||||
resetLexer(SourceMgr.getFileOffset(loc));
|
||||
}
|
||||
|
||||
bool FormatTokenLexer::tryMerge_TMacro() {
|
||||
|
@ -384,12 +411,8 @@ FormatToken *FormatTokenLexer::getStashedToken() {
|
|||
}
|
||||
|
||||
FormatToken *FormatTokenLexer::getNextToken() {
|
||||
if (GreaterStashed) {
|
||||
GreaterStashed = false;
|
||||
return getStashedToken();
|
||||
}
|
||||
if (LessStashed) {
|
||||
LessStashed = false;
|
||||
if (StateStack.top() == LexerState::TOKEN_STASHED) {
|
||||
StateStack.pop();
|
||||
return getStashedToken();
|
||||
}
|
||||
|
||||
|
@ -500,11 +523,11 @@ FormatToken *FormatTokenLexer::getNextToken() {
|
|||
} else if (FormatTok->Tok.is(tok::greatergreater)) {
|
||||
FormatTok->Tok.setKind(tok::greater);
|
||||
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
|
||||
GreaterStashed = true;
|
||||
StateStack.push(LexerState::TOKEN_STASHED);
|
||||
} else if (FormatTok->Tok.is(tok::lessless)) {
|
||||
FormatTok->Tok.setKind(tok::less);
|
||||
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
|
||||
LessStashed = true;
|
||||
StateStack.push(LexerState::TOKEN_STASHED);
|
||||
}
|
||||
|
||||
// Now FormatTok is the next non-whitespace token.
|
||||
|
|
|
@ -23,9 +23,17 @@
|
|||
#include "clang/Format/Format.h"
|
||||
#include "llvm/Support/Regex.h"
|
||||
|
||||
#include <stack>
|
||||
|
||||
namespace clang {
|
||||
namespace format {
|
||||
|
||||
enum LexerState {
|
||||
NORMAL,
|
||||
TEMPLATE_STRING,
|
||||
TOKEN_STASHED,
|
||||
};
|
||||
|
||||
class FormatTokenLexer {
|
||||
public:
|
||||
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
|
||||
|
@ -53,7 +61,16 @@ private:
|
|||
// its text if successful.
|
||||
void tryParseJSRegexLiteral();
|
||||
|
||||
void tryParseTemplateString();
|
||||
// Handles JavaScript template strings.
|
||||
//
|
||||
// JavaScript template strings use backticks ('`') as delimiters, and allow
|
||||
// embedding expressions nested in ${expr-here}. Template strings can be
|
||||
// nested recursively, i.e. expressions can contain template strings in turn.
|
||||
//
|
||||
// The code below parses starting from a backtick, up to a closing backtick or
|
||||
// an opening ${. It also maintains a stack of lexing contexts to handle
|
||||
// nested template parts by balancing curly braces.
|
||||
void handleTemplateStrings();
|
||||
|
||||
bool tryMerge_TMacro();
|
||||
|
||||
|
@ -65,7 +82,7 @@ private:
|
|||
|
||||
FormatToken *FormatTok;
|
||||
bool IsFirstToken;
|
||||
bool GreaterStashed, LessStashed;
|
||||
std::stack<LexerState> StateStack;
|
||||
unsigned Column;
|
||||
unsigned TrailingWhitespace;
|
||||
std::unique_ptr<Lexer> Lex;
|
||||
|
|
|
@ -858,7 +858,7 @@ private:
|
|||
if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
|
||||
TT_FunctionLBrace, TT_ImplicitStringLiteral,
|
||||
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
|
||||
TT_RegexLiteral))
|
||||
TT_RegexLiteral, TT_TemplateString))
|
||||
CurrentToken->Type = TT_Unknown;
|
||||
CurrentToken->Role.reset();
|
||||
CurrentToken->MatchingParen = nullptr;
|
||||
|
@ -1816,6 +1816,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
|
|||
return 100;
|
||||
if (Left.is(TT_JsTypeColon))
|
||||
return 35;
|
||||
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
|
||||
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
|
||||
return 100;
|
||||
}
|
||||
|
||||
if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
|
||||
|
@ -2114,6 +2117,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
|
|||
} else if (Style.Language == FormatStyle::LK_JavaScript) {
|
||||
if (Left.is(TT_JsFatArrow))
|
||||
return true;
|
||||
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
|
||||
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
|
||||
return false;
|
||||
if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
|
||||
return false;
|
||||
if (Right.is(tok::star) &&
|
||||
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
|
||||
return false;
|
||||
|
|
|
@ -1122,7 +1122,7 @@ TEST_F(FormatTestJS, ImportWrapping) {
|
|||
TEST_F(FormatTestJS, TemplateStrings) {
|
||||
// Keeps any whitespace/indentation within the template string.
|
||||
verifyFormat("var x = `hello\n"
|
||||
" ${ name }\n"
|
||||
" ${name}\n"
|
||||
" !`;",
|
||||
"var x = `hello\n"
|
||||
" ${ name }\n"
|
||||
|
@ -1206,6 +1206,18 @@ TEST_F(FormatTestJS, TemplateStrings) {
|
|||
"var y;",
|
||||
"var x = ` \\` a`;\n"
|
||||
"var y;");
|
||||
// Escaped dollar.
|
||||
verifyFormat("var x = ` \\${foo}`;\n");
|
||||
}
|
||||
|
||||
TEST_F(FormatTestJS, NestedTemplateStrings) {
|
||||
verifyFormat(
|
||||
"var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;");
|
||||
verifyFormat("var x = `he${({text: 'll'}.text)}o`;");
|
||||
}
|
||||
|
||||
TEST_F(FormatTestJS, TaggedTemplateStrings) {
|
||||
verifyFormat("var x = html`<ul>`;");
|
||||
}
|
||||
|
||||
TEST_F(FormatTestJS, CastSyntax) {
|
||||
|
|
Loading…
Reference in New Issue