[clang-format] Improve support for multiline C# strings

Reviewers: krasimir

Reviewed By: krasimir

Tags: #clang-format

Differential Revision: https://reviews.llvm.org/D73622
This commit is contained in:
Jonathan Coe 2020-01-30 13:22:59 +00:00
parent 8184176efd
commit f9f0919db7
4 changed files with 107 additions and 22 deletions

View File

@ -1760,7 +1760,7 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
LineState &State, bool AllowBreak) {
unsigned StartColumn = State.Column - Current.ColumnWidth;
if (Current.isStringLiteral()) {
// FIXME: String literal breaking is currently disabled for C#,Java and
// FIXME: String literal breaking is currently disabled for C#, Java and
// JavaScript, as it requires strings to be merged using "+" which we
// don't support.
if (Style.Language == FormatStyle::LK_Java ||

View File

@ -57,6 +57,10 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
if (Style.Language == FormatStyle::LK_TextProto)
tryParsePythonComment();
tryMergePreviousTokens();
if (Style.isCSharp())
// This needs to come after tokens have been merged so that C#
// string literals are correctly identified.
handleCSharpVerbatimAndInterpolatedStrings();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->Tok.isNot(tok::eof));
@ -181,12 +185,12 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
// Search for verbatim or interpolated string literals @"ABC" or
// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
// prevent splitting of @, $ and ".
// Merging of multiline verbatim strings with embedded '"' is handled in
// handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing.
bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
if (Tokens.size() < 2)
return false;
auto &CSharpStringLiteral = *(Tokens.end() - 2);
// Interpolated strings could contain { } with " characters inside.
// $"{x ?? "null"}"
// should not be split into $"{x ?? ", null, "}" but should treated as a
@ -236,27 +240,12 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
}
}
// verbatim strings could contain "" which C# sees as an escaped ".
// @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
// merging into a single string literal.
// Look for @"aaaaaa" or $"aaaaaa".
auto &String = *(Tokens.end() - 1);
if (!String->is(tok::string_literal))
return false;
if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
CSharpStringLiteral->TokenText.startswith(R"($@")"))) {
CSharpStringLiteral->TokenText = StringRef(
CSharpStringLiteral->TokenText.begin(),
String->TokenText.end() - CSharpStringLiteral->TokenText.begin());
CSharpStringLiteral->ColumnWidth += String->ColumnWidth;
Tokens.erase(Tokens.end() - 1);
return true;
}
auto &At = *(Tokens.end() - 2);
// Look for @"aaaaaa" or $"aaaaaa".
if (!(At->is(tok::at) || At->TokenText == "$"))
return false;
@ -498,6 +487,68 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
FormatToken *CSharpStringLiteral = Tokens.back();
if (CSharpStringLiteral->Type != TT_CSharpStringLiteral)
return;
// Deal with multiline strings.
if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
CSharpStringLiteral->TokenText.startswith(R"($@")")))
return;
const char *StrBegin =
Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
const char *Offset = StrBegin;
if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
Offset += 2;
else // CSharpStringLiteral->TokenText.startswith(R"($@")")
Offset += 3;
// Look for a terminating '"' in the current file buffer.
// Make no effort to format code within an interpolated or verbatim string.
for (; Offset != Lex->getBuffer().end(); ++Offset) {
if (Offset[0] == '"') {
// "" within a verbatim string is an escaped double quote: skip it.
if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
++Offset;
else
break;
}
}
// Make no attempt to format code properly if a verbatim string is
// unterminated.
if (Offset == Lex->getBuffer().end())
return;
StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
CSharpStringLiteral->TokenText = LiteralText;
// Adjust width for potentially multiline string literals.
size_t FirstBreak = LiteralText.find('\n');
StringRef FirstLineText = FirstBreak == StringRef::npos
? LiteralText
: LiteralText.substr(0, FirstBreak);
CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs(
FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,
Encoding);
size_t LastBreak = LiteralText.rfind('\n');
if (LastBreak != StringRef::npos) {
CSharpStringLiteral->IsMultiline = true;
unsigned StartColumn = 0; // The template tail spans the entire line.
CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs(
LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
Style.TabWidth, Encoding);
}
SourceLocation loc = Offset < Lex->getBuffer().end()
? Lex->getSourceLocation(Offset + 1)
: SourceMgr.getLocForEndOfFile(ID);
resetLexer(SourceMgr.getFileOffset(loc));
}
void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();

View File

@ -79,6 +79,8 @@ private:
// nested template parts by balancing curly braces.
void handleTemplateStrings();
void handleCSharpVerbatimAndInterpolatedStrings();
void tryParsePythonComment();
bool tryMerge_TMacro();

View File

@ -412,9 +412,9 @@ TEST_F(FormatTestCSharp, CSharpSpaceAfterCStyleCast) {
TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
verifyFormat(R"(string str = @"""")", Style);
verifyFormat(R"(string str = @"""Hello world""")", Style);
verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
verifyFormat(R"(string str = @"""";)", Style);
verifyFormat(R"(string str = @"""Hello world""";)", Style);
verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style);
}
TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
@ -425,5 +425,37 @@ TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
}
TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) {
// Use MS style as Google Style inserts a line break before multiline strings.
// verifyFormat does not understand multiline C# string-literals
// so check the format explicitly.
FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp);
std::string Code = R"(string s1 = $@"some code:
class {className} {{
{className}() {{}}
}}";)";
EXPECT_EQ(Code, format(Code, Style));
// Multiline string in the middle of a function call.
Code = R"(
var x = foo(className, $@"some code:
class {className} {{
{className}() {{}}
}}",
y);)"; // y aligned with `className` arg.
EXPECT_EQ(Code, format(Code, Style));
// Interpolated string with embedded multiline string.
Code = R"(Console.WriteLine($"{string.Join(@",
", values)}");)";
EXPECT_EQ(Code, format(Code, Style));
}
} // namespace format
} // end namespace clang