From e71b4cbdd140f059667f84464bd0ac0ebc348387 Mon Sep 17 00:00:00 2001 From: Martin Probst Date: Tue, 17 May 2016 06:29:29 +0000 Subject: [PATCH] clang-format: [JS] fix template string width counting. Summary: Simply looking at the final text greatly simplifies the algorithm and also fixes a reported issue. This requires duplicating the "actual encoding width" logic, but that seems cleaner than the column acrobatics before. Reviewers: djasper, bkramer Subscribers: cfe-commits, klimek Differential Revision: http://reviews.llvm.org/D20208 llvm-svn: 269747 --- clang/lib/Format/Format.cpp | 66 +++++++++---------------- clang/unittests/Format/FormatTestJS.cpp | 9 ++++ 2 files changed, 31 insertions(+), 44 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 66077d8832b9..135ca310eb04 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1010,36 +1010,20 @@ private: return false; unsigned TokenCount = 0; - bool IsMultiline = false; - unsigned EndColumnInFirstLine = - EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { ++TokenCount; - if (I[0]->IsMultiline) - IsMultiline = true; // If there was a preceding template string, this must be the start of a // template string, not the end. if (I[0]->is(TT_TemplateString)) return false; - if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { - // Keep track of the rhs offset of the last token to wrap across lines - - // its the rhs offset of the first line of the template string, used to - // determine its width. - if (I[0]->IsMultiline) - EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; - // If the token has newlines, the token before it (if it exists) is the - // rhs end of the previous line. - if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { - EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; - IsMultiline = true; - } + if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") continue; - } Tokens.resize(Tokens.size() - TokenCount); - Tokens.back()->Type = TT_TemplateString; + FormatToken *TemplateStringToken = Tokens.back(); + TemplateStringToken->Type = TT_TemplateString; const char *EndOffset = EndBacktick->TokenText.data() + 1 + CommentBacktickPos; if (CommentBacktickPos != 0) { @@ -1048,32 +1032,26 @@ private: SourceLocation Loc = EndBacktick->Tok.getLocation(); resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); } - Tokens.back()->TokenText = - StringRef(Tokens.back()->TokenText.data(), - EndOffset - Tokens.back()->TokenText.data()); + StringRef LiteralText = + StringRef(TemplateStringToken->TokenText.data(), + EndOffset - TemplateStringToken->TokenText.data()); + TemplateStringToken->TokenText = LiteralText; - unsigned EndOriginalColumn = EndBacktick->OriginalColumn; - if (EndOriginalColumn == 0) { - SourceLocation Loc = EndBacktick->Tok.getLocation(); - EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); - } - // If the ` is further down within the token (e.g. in a comment). - EndOriginalColumn += CommentBacktickPos; - - if (IsMultiline) { - // ColumnWidth is from backtick to last token in line. - // LastLineColumnWidth is 0 to backtick. - // x = `some content - // until here`; - Tokens.back()->ColumnWidth = - EndColumnInFirstLine - Tokens.back()->OriginalColumn; - // +1 for the ` itself. - Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; - Tokens.back()->IsMultiline = true; - } else { - // Token simply spans from start to end, +1 for the ` itself. - Tokens.back()->ColumnWidth = - EndOriginalColumn - Tokens.back()->OriginalColumn + 1; + size_t FirstBreak = LiteralText.find('\n'); + StringRef FirstLineText = FirstBreak == StringRef::npos + ? LiteralText + : LiteralText.substr(0, FirstBreak); + TemplateStringToken->ColumnWidth = encoding::columnWidthWithTabs( + FirstLineText, TemplateStringToken->OriginalColumn, Style.TabWidth, + Encoding); + size_t LastBreak = LiteralText.rfind('\n'); + if (LastBreak != StringRef::npos) { + TemplateStringToken->IsMultiline = true; + unsigned StartColumn = 0; // The template tail spans the entire line. + TemplateStringToken->LastLineColumnWidth = + encoding::columnWidthWithTabs( + LiteralText.substr(LastBreak + 1, LiteralText.size()), + StartColumn, Style.TabWidth, Encoding); } return true; } diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index 72d8948bb7a4..485e8224c320 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -1076,6 +1076,8 @@ TEST_F(FormatTestJS, TemplateStrings) { getGoogleJSStyleWithColumns(34)); // Barely doesn't fit. verifyFormat("var x = `hello ${world}` >= some();", getGoogleJSStyleWithColumns(35)); // Barely fits. + verifyFormat("var x = `hellö ${wörld}` >= söme();", + getGoogleJSStyleWithColumns(35)); // Fits due to UTF-8. verifyFormat("var x = `hello\n" " ${world}` >=\n" " some();", @@ -1097,6 +1099,13 @@ TEST_F(FormatTestJS, TemplateStrings) { getGoogleJSStyleWithColumns(13)); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);"); + // Repro for an obscure width-miscounting issue with template strings. + verifyFormat( + "someLongVariable =\n" + " " + "`${logPrefix[11]}/${logPrefix[12]}/${logPrefix[13]}${logPrefix[14]}`;", + "someLongVariable = " + "`${logPrefix[11]}/${logPrefix[12]}/${logPrefix[13]}${logPrefix[14]}`;"); // Make sure template strings get a proper ColumnWidth assigned, even if they // are first token in line.