clang-format: [JS] nested and tagged template strings.

JavaScript template strings can be nested arbitrarily:

    foo = `text ${es.map(e => { return `<${e}>`; })} text`;

This change lexes nested template strings using a stack of lexer states to
correctly switch back to template string lexing on closing braces.

Also, reuse the same stack for the token-stashed logic.

Reviewers: djasper

Subscribers: cfe-commits, klimek

Differential Revision: https://reviews.llvm.org/D22431

llvm-svn: 279727
This commit is contained in:
Martin Probst 2016-08-25 10:13:21 +00:00
parent 86ce267a4a
commit 6181da4796
4 changed files with 84 additions and 24 deletions

View File

@ -26,12 +26,11 @@ namespace format {
FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
const FormatStyle &Style,
encoding::Encoding Encoding)
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
LessStashed(false), Column(0), TrailingWhitespace(0),
SourceMgr(SourceMgr), ID(ID), Style(Style),
IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
MacroBlockBeginRegex(Style.MacroBlockBegin),
: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
Style(Style), IdentTable(getFormattingLangOpts(Style)),
Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
MacroBlockEndRegex(Style.MacroBlockEnd) {
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style)));
@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
Tokens.push_back(getNextToken());
if (Style.Language == FormatStyle::LK_JavaScript) {
tryParseJSRegexLiteral();
tryParseTemplateString();
handleTemplateStrings();
}
tryMergePreviousTokens();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
@ -228,17 +227,42 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
void FormatTokenLexer::tryParseTemplateString() {
void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
if (BacktickToken->is(tok::l_brace)) {
StateStack.push(LexerState::NORMAL);
return;
}
if (BacktickToken->is(tok::r_brace)) {
StateStack.pop();
if (StateStack.top() != LexerState::TEMPLATE_STRING)
return;
// If back in TEMPLATE_STRING, fallthrough and continue parsing the
} else if (BacktickToken->is(tok::unknown) &&
BacktickToken->TokenText == "`") {
StateStack.push(LexerState::TEMPLATE_STRING);
} else {
return; // Not actually a template
}
// 'Manually' lex ahead in the current file buffer.
const char *Offset = Lex->getBufferLocation();
const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
if (*Offset == '\\')
for (; Offset != Lex->getBuffer().end(); ++Offset) {
if (Offset[0] == '`') {
StateStack.pop();
break;
}
if (Offset[0] == '\\') {
++Offset; // Skip the escaped character.
} else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
Offset[1] == '{') {
// '${' introduces an expression interpolation in the template string.
StateStack.push(LexerState::NORMAL);
++Offset;
break;
}
}
StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
@ -262,7 +286,10 @@ void FormatTokenLexer::tryParseTemplateString() {
Style.TabWidth, Encoding);
}
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
SourceLocation loc = Offset < Lex->getBuffer().end()
? Lex->getSourceLocation(Offset + 1)
: SourceMgr.getLocForEndOfFile(ID);
resetLexer(SourceMgr.getFileOffset(loc));
}
bool FormatTokenLexer::tryMerge_TMacro() {
@ -384,12 +411,8 @@ FormatToken *FormatTokenLexer::getStashedToken() {
}
FormatToken *FormatTokenLexer::getNextToken() {
if (GreaterStashed) {
GreaterStashed = false;
return getStashedToken();
}
if (LessStashed) {
LessStashed = false;
if (StateStack.top() == LexerState::TOKEN_STASHED) {
StateStack.pop();
return getStashedToken();
}
@ -500,11 +523,11 @@ FormatToken *FormatTokenLexer::getNextToken() {
} else if (FormatTok->Tok.is(tok::greatergreater)) {
FormatTok->Tok.setKind(tok::greater);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
GreaterStashed = true;
StateStack.push(LexerState::TOKEN_STASHED);
} else if (FormatTok->Tok.is(tok::lessless)) {
FormatTok->Tok.setKind(tok::less);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
LessStashed = true;
StateStack.push(LexerState::TOKEN_STASHED);
}
// Now FormatTok is the next non-whitespace token.

View File

@ -23,9 +23,17 @@
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
#include <stack>
namespace clang {
namespace format {
enum LexerState {
NORMAL,
TEMPLATE_STRING,
TOKEN_STASHED,
};
class FormatTokenLexer {
public:
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
@ -53,7 +61,16 @@ private:
// its text if successful.
void tryParseJSRegexLiteral();
void tryParseTemplateString();
// Handles JavaScript template strings.
//
// JavaScript template strings use backticks ('`') as delimiters, and allow
// embedding expressions nested in ${expr-here}. Template strings can be
// nested recursively, i.e. expressions can contain template strings in turn.
//
// The code below parses starting from a backtick, up to a closing backtick or
// an opening ${. It also maintains a stack of lexing contexts to handle
// nested template parts by balancing curly braces.
void handleTemplateStrings();
bool tryMerge_TMacro();
@ -65,7 +82,7 @@ private:
FormatToken *FormatTok;
bool IsFirstToken;
bool GreaterStashed, LessStashed;
std::stack<LexerState> StateStack;
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;

View File

@ -858,7 +858,7 @@ private:
if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
TT_FunctionLBrace, TT_ImplicitStringLiteral,
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
TT_RegexLiteral))
TT_RegexLiteral, TT_TemplateString))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
CurrentToken->MatchingParen = nullptr;
@ -1816,6 +1816,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
return 100;
if (Left.is(TT_JsTypeColon))
return 35;
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
return 100;
}
if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
@ -2114,6 +2117,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(TT_JsFatArrow))
return true;
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
return false;
if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
return false;
if (Right.is(tok::star) &&
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
return false;

View File

@ -1122,7 +1122,7 @@ TEST_F(FormatTestJS, ImportWrapping) {
TEST_F(FormatTestJS, TemplateStrings) {
// Keeps any whitespace/indentation within the template string.
verifyFormat("var x = `hello\n"
" ${ name }\n"
" ${name}\n"
" !`;",
"var x = `hello\n"
" ${ name }\n"
@ -1206,6 +1206,18 @@ TEST_F(FormatTestJS, TemplateStrings) {
"var y;",
"var x = ` \\` a`;\n"
"var y;");
// Escaped dollar.
verifyFormat("var x = ` \\${foo}`;\n");
}
TEST_F(FormatTestJS, NestedTemplateStrings) {
verifyFormat(
"var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;");
verifyFormat("var x = `he${({text: 'll'}.text)}o`;");
}
TEST_F(FormatTestJS, TaggedTemplateStrings) {
verifyFormat("var x = html`<ul>`;");
}
TEST_F(FormatTestJS, CastSyntax) {