clang-format: [JS] Support regex literals with trailing escaped slash.

Before:
  var regex = / a\//; int i;

After:
  var regex = /a\//;
  int i;

This required pushing the Lexer into its wrapper class and generating a
new one in this specific case. Otherwise, the sequence get lexed as a
//-comment. This is hacky, but I don't know a better way (short of
supporting regex literals in the Lexer).

Pushing the Lexer down seems to make all the call sites simpler.

llvm-svn: 217444
This commit is contained in:
Daniel Jasper 2014-09-09 14:37:39 +00:00
parent 7fc29546f9
commit 23376259c0
5 changed files with 71 additions and 38 deletions

View File

@ -487,14 +487,21 @@ std::string configurationAsText(const FormatStyle &Style);
/// \brief Reformats the given \p Ranges in the token stream coming out of /// \brief Reformats the given \p Ranges in the token stream coming out of
/// \c Lex. /// \c Lex.
/// ///
/// DEPRECATED: Do not use.
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
SourceManager &SourceMgr,
std::vector<CharSourceRange> Ranges);
/// \brief Reformats the given \p Ranges in the file \p ID.
///
/// Each range is extended on either end to its next bigger logic unit, i.e. /// Each range is extended on either end to its next bigger logic unit, i.e.
/// everything that might influence its formatting or might be influenced by its /// everything that might influence its formatting or might be influenced by its
/// formatting. /// formatting.
/// ///
/// Returns the \c Replacements necessary to make all \p Ranges comply with /// Returns the \c Replacements necessary to make all \p Ranges comply with
/// \p Style. /// \p Style.
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, tooling::Replacements reformat(const FormatStyle &Style,
SourceManager &SourceMgr, SourceManager &SourceMgr, FileID ID,
std::vector<CharSourceRange> Ranges); std::vector<CharSourceRange> Ranges);
/// \brief Reformats the given \p Ranges in \p Code. /// \brief Reformats the given \p Ranges in \p Code.

View File

@ -1273,13 +1273,16 @@ private:
class FormatTokenLexer { class FormatTokenLexer {
public: public:
FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style, FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
encoding::Encoding Encoding) encoding::Encoding Encoding)
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Column(0), TrailingWhitespace(0),
Style(Style), IdentTable(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID), Style(Style),
Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) { IdentTable(getFormattingLangOpts(Style)), Encoding(Encoding),
Lex.SetKeepWhitespaceMode(true); FirstInLineIndex(0), FormattingDisabled(false) {
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style)));
Lex->SetKeepWhitespaceMode(true);
for (const std::string &ForEachMacro : Style.ForEachMacros) for (const std::string &ForEachMacro : Style.ForEachMacros)
ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
@ -1308,10 +1311,10 @@ private:
return; return;
if (Style.Language == FormatStyle::LK_JavaScript) { if (Style.Language == FormatStyle::LK_JavaScript) {
if (tryMergeEscapeSequence())
return;
if (tryMergeJSRegexLiteral()) if (tryMergeJSRegexLiteral())
return; return;
if (tryMergeEscapeSequence())
return;
static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
@ -1376,9 +1379,18 @@ private:
// "(;,{}![:?", a binary operator or 'return', as those cannot be followed by // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
// a division. // a division.
bool tryMergeJSRegexLiteral() { bool tryMergeJSRegexLiteral() {
if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) || if (Tokens.size() < 2)
(Tokens[Tokens.size() - 2]->is(tok::unknown) && return false;
Tokens[Tokens.size() - 2]->TokenText == "\\")) // If a regex literal ends in "\//", this gets represented by an unknown
// token "\" and a comment.
bool MightEndWithEscapedSlash =
Tokens.back()->is(tok::comment) &&
Tokens.back()->TokenText.startswith("//") &&
Tokens[Tokens.size() - 2]->TokenText == "\\";
if (!MightEndWithEscapedSlash &&
(Tokens.back()->isNot(tok::slash) ||
(Tokens[Tokens.size() - 2]->is(tok::unknown) &&
Tokens[Tokens.size() - 2]->TokenText == "\\")))
return false; return false;
unsigned TokenCount = 0; unsigned TokenCount = 0;
unsigned LastColumn = Tokens.back()->OriginalColumn; unsigned LastColumn = Tokens.back()->OriginalColumn;
@ -1389,6 +1401,17 @@ private:
tok::exclaim, tok::l_square, tok::colon, tok::comma, tok::exclaim, tok::l_square, tok::colon, tok::comma,
tok::question, tok::kw_return) || tok::question, tok::kw_return) ||
I[1]->isBinaryOperator())) { I[1]->isBinaryOperator())) {
if (MightEndWithEscapedSlash) {
StringRef Buffer = SourceMgr.getBufferData(ID);
// This regex literal ends in '\//'. Skip past the '//' of the last
// token and re-start lexing from there.
int offset =
SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 2;
Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
getFormattingLangOpts(Style), Buffer.begin(),
Buffer.begin() + offset, Buffer.end()));
Lex->SetKeepWhitespaceMode(true);
}
Tokens.resize(Tokens.size() - TokenCount); Tokens.resize(Tokens.size() - TokenCount);
Tokens.back()->Tok.setKind(tok::unknown); Tokens.back()->Tok.setKind(tok::unknown);
Tokens.back()->Type = TT_RegexLiteral; Tokens.back()->Type = TT_RegexLiteral;
@ -1641,8 +1664,9 @@ private:
bool GreaterStashed; bool GreaterStashed;
unsigned Column; unsigned Column;
unsigned TrailingWhitespace; unsigned TrailingWhitespace;
Lexer &Lex; std::unique_ptr<Lexer> Lex;
SourceManager &SourceMgr; SourceManager &SourceMgr;
FileID ID;
FormatStyle &Style; FormatStyle &Style;
IdentifierTable IdentTable; IdentifierTable IdentTable;
encoding::Encoding Encoding; encoding::Encoding Encoding;
@ -1655,7 +1679,7 @@ private:
bool FormattingDisabled; bool FormattingDisabled;
void readRawToken(FormatToken &Tok) { void readRawToken(FormatToken &Tok) {
Lex.LexFromRawLexer(Tok.Tok); Lex->LexFromRawLexer(Tok.Tok);
Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
Tok.Tok.getLength()); Tok.Tok.getLength());
// For formatting, treat unterminated string literals like normal string // For formatting, treat unterminated string literals like normal string
@ -1692,12 +1716,13 @@ static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
class Formatter : public UnwrappedLineConsumer { class Formatter : public UnwrappedLineConsumer {
public: public:
Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
const std::vector<CharSourceRange> &Ranges) const std::vector<CharSourceRange> &Ranges)
: Style(Style), Lex(Lex), SourceMgr(SourceMgr), : Style(Style), ID(ID), SourceMgr(SourceMgr),
Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())), Whitespaces(SourceMgr, Style,
inputUsesCRLF(SourceMgr.getBufferData(ID))),
Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
Encoding(encoding::detectEncoding(Lex.getBuffer())) { Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
DEBUG(llvm::dbgs() << "File encoding: " DEBUG(llvm::dbgs() << "File encoding: "
<< (Encoding == encoding::Encoding_UTF8 ? "UTF8" << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
: "unknown") : "unknown")
@ -1708,7 +1733,7 @@ public:
tooling::Replacements format() { tooling::Replacements format() {
tooling::Replacements Result; tooling::Replacements Result;
FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding); FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
UnwrappedLineParser Parser(Style, Tokens.lex(), *this); UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
bool StructuralError = Parser.parse(); bool StructuralError = Parser.parse();
@ -1962,7 +1987,7 @@ private:
} }
FormatStyle Style; FormatStyle Style;
Lexer &Lex; FileID ID;
SourceManager &SourceMgr; SourceManager &SourceMgr;
WhitespaceManager Whitespaces; WhitespaceManager Whitespaces;
SmallVector<CharSourceRange, 8> Ranges; SmallVector<CharSourceRange, 8> Ranges;
@ -1977,18 +2002,27 @@ private:
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
SourceManager &SourceMgr, SourceManager &SourceMgr,
std::vector<CharSourceRange> Ranges) { std::vector<CharSourceRange> Ranges) {
if (Style.DisableFormat) { if (Style.DisableFormat)
tooling::Replacements EmptyResult; return tooling::Replacements();
return EmptyResult; return reformat(Style, SourceMgr,
} SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
}
Formatter formatter(Style, Lex, SourceMgr, Ranges); tooling::Replacements reformat(const FormatStyle &Style,
SourceManager &SourceMgr, FileID ID,
std::vector<CharSourceRange> Ranges) {
if (Style.DisableFormat)
return tooling::Replacements();
Formatter formatter(Style, SourceMgr, ID, Ranges);
return formatter.format(); return formatter.format();
} }
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
std::vector<tooling::Range> Ranges, std::vector<tooling::Range> Ranges,
StringRef FileName) { StringRef FileName) {
if (Style.DisableFormat)
return tooling::Replacements();
FileManager Files((FileSystemOptions())); FileManager Files((FileSystemOptions()));
DiagnosticsEngine Diagnostics( DiagnosticsEngine Diagnostics(
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
@ -2001,8 +2035,6 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
SourceMgr.overrideFileContents(Entry, std::move(Buf)); SourceMgr.overrideFileContents(Entry, std::move(Buf));
FileID ID = FileID ID =
SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style));
SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
std::vector<CharSourceRange> CharRanges; std::vector<CharSourceRange> CharRanges;
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
@ -2010,7 +2042,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
} }
return reformat(Style, Lex, SourceMgr, CharRanges); return reformat(Style, SourceMgr, ID, CharRanges);
} }
LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOptions getFormattingLangOpts(const FormatStyle &Style) {

View File

@ -15,7 +15,6 @@
#include "clang/AST/CommentVisitor.h" #include "clang/AST/CommentVisitor.h"
#include "clang/Format/Format.h" #include "clang/Format/Format.h"
#include "clang/Index/USRGeneration.h" #include "clang/Index/USRGeneration.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
@ -611,12 +610,8 @@ void CommentASTToXMLConverter::formatTextOfDeclaration(
std::vector<CharSourceRange> Ranges( std::vector<CharSourceRange> Ranges(
1, CharSourceRange::getCharRange(Start, Start.getLocWithOffset(Length))); 1, CharSourceRange::getCharRange(Start, Start.getLocWithOffset(Length)));
ASTContext &Context = DI->CurrentDecl->getASTContext();
const LangOptions &LangOpts = Context.getLangOpts();
Lexer Lex(ID, FormatRewriterContext.Sources.getBuffer(ID),
FormatRewriterContext.Sources, LangOpts);
tooling::Replacements Replace = reformat( tooling::Replacements Replace = reformat(
format::getLLVMStyle(), Lex, FormatRewriterContext.Sources, Ranges); format::getLLVMStyle(), FormatRewriterContext.Sources, ID, Ranges);
applyAllReplacements(Replace, FormatRewriterContext.Rewrite); applyAllReplacements(Replace, FormatRewriterContext.Rewrite);
Declaration = FormatRewriterContext.getRewrittenText(ID); Declaration = FormatRewriterContext.getRewrittenText(ID);
} }

View File

@ -19,7 +19,6 @@
#include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceManager.h"
#include "clang/Basic/Version.h" #include "clang/Basic/Version.h"
#include "clang/Format/Format.h" #include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "clang/Rewrite/Core/Rewriter.h" #include "clang/Rewrite/Core/Rewriter.h"
#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringMap.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
@ -225,9 +224,7 @@ static bool format(StringRef FileName) {
FormatStyle FormatStyle = getStyle( FormatStyle FormatStyle = getStyle(
Style, (FileName == "-") ? AssumeFilename : FileName, FallbackStyle); Style, (FileName == "-") ? AssumeFilename : FileName, FallbackStyle);
Lexer Lex(ID, Sources.getBuffer(ID), Sources, tooling::Replacements Replaces = reformat(FormatStyle, Sources, ID, Ranges);
getFormattingLangOpts(FormatStyle));
tooling::Replacements Replaces = reformat(FormatStyle, Lex, Sources, Ranges);
if (OutputXML) { if (OutputXML) {
llvm::outs() llvm::outs()
<< "<?xml version='1.0'?>\n<replacements xml:space='preserve'>\n"; << "<?xml version='1.0'?>\n<replacements xml:space='preserve'>\n";

View File

@ -330,6 +330,8 @@ TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) {
verifyFormat("var regex = /\\\\/g;"); verifyFormat("var regex = /\\\\/g;");
verifyFormat("var regex = /\\a\\\\/g;"); verifyFormat("var regex = /\\a\\\\/g;");
verifyFormat("var regex = /\a\\//g;"); verifyFormat("var regex = /\a\\//g;");
verifyFormat("var regex = /a\\//;\n"
"var x = 0;");
} }
TEST_F(FormatTestJS, RegexLiteralModifiers) { TEST_F(FormatTestJS, RegexLiteralModifiers) {