forked from OSchip/llvm-project
clang-format: [JS] Support regex literals with trailing escaped slash.
Before: var regex = / a\//; int i; After: var regex = /a\//; int i; This required pushing the Lexer into its wrapper class and generating a new one in this specific case. Otherwise, the sequence get lexed as a //-comment. This is hacky, but I don't know a better way (short of supporting regex literals in the Lexer). Pushing the Lexer down seems to make all the call sites simpler. llvm-svn: 217444
This commit is contained in:
parent
7fc29546f9
commit
23376259c0
|
@ -487,14 +487,21 @@ std::string configurationAsText(const FormatStyle &Style);
|
||||||
/// \brief Reformats the given \p Ranges in the token stream coming out of
|
/// \brief Reformats the given \p Ranges in the token stream coming out of
|
||||||
/// \c Lex.
|
/// \c Lex.
|
||||||
///
|
///
|
||||||
|
/// DEPRECATED: Do not use.
|
||||||
|
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
|
||||||
|
SourceManager &SourceMgr,
|
||||||
|
std::vector<CharSourceRange> Ranges);
|
||||||
|
|
||||||
|
/// \brief Reformats the given \p Ranges in the file \p ID.
|
||||||
|
///
|
||||||
/// Each range is extended on either end to its next bigger logic unit, i.e.
|
/// Each range is extended on either end to its next bigger logic unit, i.e.
|
||||||
/// everything that might influence its formatting or might be influenced by its
|
/// everything that might influence its formatting or might be influenced by its
|
||||||
/// formatting.
|
/// formatting.
|
||||||
///
|
///
|
||||||
/// Returns the \c Replacements necessary to make all \p Ranges comply with
|
/// Returns the \c Replacements necessary to make all \p Ranges comply with
|
||||||
/// \p Style.
|
/// \p Style.
|
||||||
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
|
tooling::Replacements reformat(const FormatStyle &Style,
|
||||||
SourceManager &SourceMgr,
|
SourceManager &SourceMgr, FileID ID,
|
||||||
std::vector<CharSourceRange> Ranges);
|
std::vector<CharSourceRange> Ranges);
|
||||||
|
|
||||||
/// \brief Reformats the given \p Ranges in \p Code.
|
/// \brief Reformats the given \p Ranges in \p Code.
|
||||||
|
|
|
@ -1273,13 +1273,16 @@ private:
|
||||||
|
|
||||||
class FormatTokenLexer {
|
class FormatTokenLexer {
|
||||||
public:
|
public:
|
||||||
FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
|
FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
|
||||||
encoding::Encoding Encoding)
|
encoding::Encoding Encoding)
|
||||||
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
|
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
|
||||||
Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr),
|
Column(0), TrailingWhitespace(0),
|
||||||
Style(Style), IdentTable(getFormattingLangOpts(Style)),
|
SourceMgr(SourceMgr), ID(ID), Style(Style),
|
||||||
Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) {
|
IdentTable(getFormattingLangOpts(Style)), Encoding(Encoding),
|
||||||
Lex.SetKeepWhitespaceMode(true);
|
FirstInLineIndex(0), FormattingDisabled(false) {
|
||||||
|
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
|
||||||
|
getFormattingLangOpts(Style)));
|
||||||
|
Lex->SetKeepWhitespaceMode(true);
|
||||||
|
|
||||||
for (const std::string &ForEachMacro : Style.ForEachMacros)
|
for (const std::string &ForEachMacro : Style.ForEachMacros)
|
||||||
ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
|
ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
|
||||||
|
@ -1308,10 +1311,10 @@ private:
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (Style.Language == FormatStyle::LK_JavaScript) {
|
if (Style.Language == FormatStyle::LK_JavaScript) {
|
||||||
if (tryMergeEscapeSequence())
|
|
||||||
return;
|
|
||||||
if (tryMergeJSRegexLiteral())
|
if (tryMergeJSRegexLiteral())
|
||||||
return;
|
return;
|
||||||
|
if (tryMergeEscapeSequence())
|
||||||
|
return;
|
||||||
|
|
||||||
static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
|
static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
|
||||||
static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
|
static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
|
||||||
|
@ -1376,9 +1379,18 @@ private:
|
||||||
// "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
|
// "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
|
||||||
// a division.
|
// a division.
|
||||||
bool tryMergeJSRegexLiteral() {
|
bool tryMergeJSRegexLiteral() {
|
||||||
if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) ||
|
if (Tokens.size() < 2)
|
||||||
(Tokens[Tokens.size() - 2]->is(tok::unknown) &&
|
return false;
|
||||||
Tokens[Tokens.size() - 2]->TokenText == "\\"))
|
// If a regex literal ends in "\//", this gets represented by an unknown
|
||||||
|
// token "\" and a comment.
|
||||||
|
bool MightEndWithEscapedSlash =
|
||||||
|
Tokens.back()->is(tok::comment) &&
|
||||||
|
Tokens.back()->TokenText.startswith("//") &&
|
||||||
|
Tokens[Tokens.size() - 2]->TokenText == "\\";
|
||||||
|
if (!MightEndWithEscapedSlash &&
|
||||||
|
(Tokens.back()->isNot(tok::slash) ||
|
||||||
|
(Tokens[Tokens.size() - 2]->is(tok::unknown) &&
|
||||||
|
Tokens[Tokens.size() - 2]->TokenText == "\\")))
|
||||||
return false;
|
return false;
|
||||||
unsigned TokenCount = 0;
|
unsigned TokenCount = 0;
|
||||||
unsigned LastColumn = Tokens.back()->OriginalColumn;
|
unsigned LastColumn = Tokens.back()->OriginalColumn;
|
||||||
|
@ -1389,6 +1401,17 @@ private:
|
||||||
tok::exclaim, tok::l_square, tok::colon, tok::comma,
|
tok::exclaim, tok::l_square, tok::colon, tok::comma,
|
||||||
tok::question, tok::kw_return) ||
|
tok::question, tok::kw_return) ||
|
||||||
I[1]->isBinaryOperator())) {
|
I[1]->isBinaryOperator())) {
|
||||||
|
if (MightEndWithEscapedSlash) {
|
||||||
|
StringRef Buffer = SourceMgr.getBufferData(ID);
|
||||||
|
// This regex literal ends in '\//'. Skip past the '//' of the last
|
||||||
|
// token and re-start lexing from there.
|
||||||
|
int offset =
|
||||||
|
SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 2;
|
||||||
|
Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
|
||||||
|
getFormattingLangOpts(Style), Buffer.begin(),
|
||||||
|
Buffer.begin() + offset, Buffer.end()));
|
||||||
|
Lex->SetKeepWhitespaceMode(true);
|
||||||
|
}
|
||||||
Tokens.resize(Tokens.size() - TokenCount);
|
Tokens.resize(Tokens.size() - TokenCount);
|
||||||
Tokens.back()->Tok.setKind(tok::unknown);
|
Tokens.back()->Tok.setKind(tok::unknown);
|
||||||
Tokens.back()->Type = TT_RegexLiteral;
|
Tokens.back()->Type = TT_RegexLiteral;
|
||||||
|
@ -1641,8 +1664,9 @@ private:
|
||||||
bool GreaterStashed;
|
bool GreaterStashed;
|
||||||
unsigned Column;
|
unsigned Column;
|
||||||
unsigned TrailingWhitespace;
|
unsigned TrailingWhitespace;
|
||||||
Lexer &Lex;
|
std::unique_ptr<Lexer> Lex;
|
||||||
SourceManager &SourceMgr;
|
SourceManager &SourceMgr;
|
||||||
|
FileID ID;
|
||||||
FormatStyle &Style;
|
FormatStyle &Style;
|
||||||
IdentifierTable IdentTable;
|
IdentifierTable IdentTable;
|
||||||
encoding::Encoding Encoding;
|
encoding::Encoding Encoding;
|
||||||
|
@ -1655,7 +1679,7 @@ private:
|
||||||
bool FormattingDisabled;
|
bool FormattingDisabled;
|
||||||
|
|
||||||
void readRawToken(FormatToken &Tok) {
|
void readRawToken(FormatToken &Tok) {
|
||||||
Lex.LexFromRawLexer(Tok.Tok);
|
Lex->LexFromRawLexer(Tok.Tok);
|
||||||
Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
|
Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
|
||||||
Tok.Tok.getLength());
|
Tok.Tok.getLength());
|
||||||
// For formatting, treat unterminated string literals like normal string
|
// For formatting, treat unterminated string literals like normal string
|
||||||
|
@ -1692,12 +1716,13 @@ static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
|
||||||
|
|
||||||
class Formatter : public UnwrappedLineConsumer {
|
class Formatter : public UnwrappedLineConsumer {
|
||||||
public:
|
public:
|
||||||
Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
|
Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
|
||||||
const std::vector<CharSourceRange> &Ranges)
|
const std::vector<CharSourceRange> &Ranges)
|
||||||
: Style(Style), Lex(Lex), SourceMgr(SourceMgr),
|
: Style(Style), ID(ID), SourceMgr(SourceMgr),
|
||||||
Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
|
Whitespaces(SourceMgr, Style,
|
||||||
|
inputUsesCRLF(SourceMgr.getBufferData(ID))),
|
||||||
Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
|
Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
|
||||||
Encoding(encoding::detectEncoding(Lex.getBuffer())) {
|
Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
|
||||||
DEBUG(llvm::dbgs() << "File encoding: "
|
DEBUG(llvm::dbgs() << "File encoding: "
|
||||||
<< (Encoding == encoding::Encoding_UTF8 ? "UTF8"
|
<< (Encoding == encoding::Encoding_UTF8 ? "UTF8"
|
||||||
: "unknown")
|
: "unknown")
|
||||||
|
@ -1708,7 +1733,7 @@ public:
|
||||||
|
|
||||||
tooling::Replacements format() {
|
tooling::Replacements format() {
|
||||||
tooling::Replacements Result;
|
tooling::Replacements Result;
|
||||||
FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
|
FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
|
||||||
|
|
||||||
UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
|
UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
|
||||||
bool StructuralError = Parser.parse();
|
bool StructuralError = Parser.parse();
|
||||||
|
@ -1962,7 +1987,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
FormatStyle Style;
|
FormatStyle Style;
|
||||||
Lexer &Lex;
|
FileID ID;
|
||||||
SourceManager &SourceMgr;
|
SourceManager &SourceMgr;
|
||||||
WhitespaceManager Whitespaces;
|
WhitespaceManager Whitespaces;
|
||||||
SmallVector<CharSourceRange, 8> Ranges;
|
SmallVector<CharSourceRange, 8> Ranges;
|
||||||
|
@ -1977,18 +2002,27 @@ private:
|
||||||
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
|
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
|
||||||
SourceManager &SourceMgr,
|
SourceManager &SourceMgr,
|
||||||
std::vector<CharSourceRange> Ranges) {
|
std::vector<CharSourceRange> Ranges) {
|
||||||
if (Style.DisableFormat) {
|
if (Style.DisableFormat)
|
||||||
tooling::Replacements EmptyResult;
|
return tooling::Replacements();
|
||||||
return EmptyResult;
|
return reformat(Style, SourceMgr,
|
||||||
}
|
SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
|
||||||
|
}
|
||||||
|
|
||||||
Formatter formatter(Style, Lex, SourceMgr, Ranges);
|
tooling::Replacements reformat(const FormatStyle &Style,
|
||||||
|
SourceManager &SourceMgr, FileID ID,
|
||||||
|
std::vector<CharSourceRange> Ranges) {
|
||||||
|
if (Style.DisableFormat)
|
||||||
|
return tooling::Replacements();
|
||||||
|
Formatter formatter(Style, SourceMgr, ID, Ranges);
|
||||||
return formatter.format();
|
return formatter.format();
|
||||||
}
|
}
|
||||||
|
|
||||||
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
|
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
|
||||||
std::vector<tooling::Range> Ranges,
|
std::vector<tooling::Range> Ranges,
|
||||||
StringRef FileName) {
|
StringRef FileName) {
|
||||||
|
if (Style.DisableFormat)
|
||||||
|
return tooling::Replacements();
|
||||||
|
|
||||||
FileManager Files((FileSystemOptions()));
|
FileManager Files((FileSystemOptions()));
|
||||||
DiagnosticsEngine Diagnostics(
|
DiagnosticsEngine Diagnostics(
|
||||||
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
|
IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
|
||||||
|
@ -2001,8 +2035,6 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
|
||||||
SourceMgr.overrideFileContents(Entry, std::move(Buf));
|
SourceMgr.overrideFileContents(Entry, std::move(Buf));
|
||||||
FileID ID =
|
FileID ID =
|
||||||
SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
|
SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
|
||||||
Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
|
|
||||||
getFormattingLangOpts(Style));
|
|
||||||
SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
|
SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
|
||||||
std::vector<CharSourceRange> CharRanges;
|
std::vector<CharSourceRange> CharRanges;
|
||||||
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
|
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
|
||||||
|
@ -2010,7 +2042,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
|
||||||
SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
|
SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
|
||||||
CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
|
CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
|
||||||
}
|
}
|
||||||
return reformat(Style, Lex, SourceMgr, CharRanges);
|
return reformat(Style, SourceMgr, ID, CharRanges);
|
||||||
}
|
}
|
||||||
|
|
||||||
LangOptions getFormattingLangOpts(const FormatStyle &Style) {
|
LangOptions getFormattingLangOpts(const FormatStyle &Style) {
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
#include "clang/AST/CommentVisitor.h"
|
#include "clang/AST/CommentVisitor.h"
|
||||||
#include "clang/Format/Format.h"
|
#include "clang/Format/Format.h"
|
||||||
#include "clang/Index/USRGeneration.h"
|
#include "clang/Index/USRGeneration.h"
|
||||||
#include "clang/Lex/Lexer.h"
|
|
||||||
#include "llvm/ADT/StringExtras.h"
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/ADT/TinyPtrVector.h"
|
#include "llvm/ADT/TinyPtrVector.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
@ -611,12 +610,8 @@ void CommentASTToXMLConverter::formatTextOfDeclaration(
|
||||||
|
|
||||||
std::vector<CharSourceRange> Ranges(
|
std::vector<CharSourceRange> Ranges(
|
||||||
1, CharSourceRange::getCharRange(Start, Start.getLocWithOffset(Length)));
|
1, CharSourceRange::getCharRange(Start, Start.getLocWithOffset(Length)));
|
||||||
ASTContext &Context = DI->CurrentDecl->getASTContext();
|
|
||||||
const LangOptions &LangOpts = Context.getLangOpts();
|
|
||||||
Lexer Lex(ID, FormatRewriterContext.Sources.getBuffer(ID),
|
|
||||||
FormatRewriterContext.Sources, LangOpts);
|
|
||||||
tooling::Replacements Replace = reformat(
|
tooling::Replacements Replace = reformat(
|
||||||
format::getLLVMStyle(), Lex, FormatRewriterContext.Sources, Ranges);
|
format::getLLVMStyle(), FormatRewriterContext.Sources, ID, Ranges);
|
||||||
applyAllReplacements(Replace, FormatRewriterContext.Rewrite);
|
applyAllReplacements(Replace, FormatRewriterContext.Rewrite);
|
||||||
Declaration = FormatRewriterContext.getRewrittenText(ID);
|
Declaration = FormatRewriterContext.getRewrittenText(ID);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
#include "clang/Basic/SourceManager.h"
|
#include "clang/Basic/SourceManager.h"
|
||||||
#include "clang/Basic/Version.h"
|
#include "clang/Basic/Version.h"
|
||||||
#include "clang/Format/Format.h"
|
#include "clang/Format/Format.h"
|
||||||
#include "clang/Lex/Lexer.h"
|
|
||||||
#include "clang/Rewrite/Core/Rewriter.h"
|
#include "clang/Rewrite/Core/Rewriter.h"
|
||||||
#include "llvm/ADT/StringMap.h"
|
#include "llvm/ADT/StringMap.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
@ -225,9 +224,7 @@ static bool format(StringRef FileName) {
|
||||||
|
|
||||||
FormatStyle FormatStyle = getStyle(
|
FormatStyle FormatStyle = getStyle(
|
||||||
Style, (FileName == "-") ? AssumeFilename : FileName, FallbackStyle);
|
Style, (FileName == "-") ? AssumeFilename : FileName, FallbackStyle);
|
||||||
Lexer Lex(ID, Sources.getBuffer(ID), Sources,
|
tooling::Replacements Replaces = reformat(FormatStyle, Sources, ID, Ranges);
|
||||||
getFormattingLangOpts(FormatStyle));
|
|
||||||
tooling::Replacements Replaces = reformat(FormatStyle, Lex, Sources, Ranges);
|
|
||||||
if (OutputXML) {
|
if (OutputXML) {
|
||||||
llvm::outs()
|
llvm::outs()
|
||||||
<< "<?xml version='1.0'?>\n<replacements xml:space='preserve'>\n";
|
<< "<?xml version='1.0'?>\n<replacements xml:space='preserve'>\n";
|
||||||
|
|
|
@ -330,6 +330,8 @@ TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) {
|
||||||
verifyFormat("var regex = /\\\\/g;");
|
verifyFormat("var regex = /\\\\/g;");
|
||||||
verifyFormat("var regex = /\\a\\\\/g;");
|
verifyFormat("var regex = /\\a\\\\/g;");
|
||||||
verifyFormat("var regex = /\a\\//g;");
|
verifyFormat("var regex = /\a\\//g;");
|
||||||
|
verifyFormat("var regex = /a\\//;\n"
|
||||||
|
"var x = 0;");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(FormatTestJS, RegexLiteralModifiers) {
|
TEST_F(FormatTestJS, RegexLiteralModifiers) {
|
||||||
|
|
Loading…
Reference in New Issue