forked from OSchip/llvm-project
[clang-format] Improve support for multiline C# strings
Reviewers: krasimir Reviewed By: krasimir Tags: #clang-format Differential Revision: https://reviews.llvm.org/D73622
This commit is contained in:
parent
8184176efd
commit
f9f0919db7
|
@ -1760,7 +1760,7 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
|
|||
LineState &State, bool AllowBreak) {
|
||||
unsigned StartColumn = State.Column - Current.ColumnWidth;
|
||||
if (Current.isStringLiteral()) {
|
||||
// FIXME: String literal breaking is currently disabled for C#,Java and
|
||||
// FIXME: String literal breaking is currently disabled for C#, Java and
|
||||
// JavaScript, as it requires strings to be merged using "+" which we
|
||||
// don't support.
|
||||
if (Style.Language == FormatStyle::LK_Java ||
|
||||
|
|
|
@ -57,6 +57,10 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
|
|||
if (Style.Language == FormatStyle::LK_TextProto)
|
||||
tryParsePythonComment();
|
||||
tryMergePreviousTokens();
|
||||
if (Style.isCSharp())
|
||||
// This needs to come after tokens have been merged so that C#
|
||||
// string literals are correctly identified.
|
||||
handleCSharpVerbatimAndInterpolatedStrings();
|
||||
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
|
||||
FirstInLineIndex = Tokens.size() - 1;
|
||||
} while (Tokens.back()->Tok.isNot(tok::eof));
|
||||
|
@ -181,12 +185,12 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
|
|||
// Search for verbatim or interpolated string literals @"ABC" or
|
||||
// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
|
||||
// prevent splitting of @, $ and ".
|
||||
// Merging of multiline verbatim strings with embedded '"' is handled in
|
||||
// handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing.
|
||||
bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
|
||||
if (Tokens.size() < 2)
|
||||
return false;
|
||||
|
||||
auto &CSharpStringLiteral = *(Tokens.end() - 2);
|
||||
|
||||
// Interpolated strings could contain { } with " characters inside.
|
||||
// $"{x ?? "null"}"
|
||||
// should not be split into $"{x ?? ", null, "}" but should treated as a
|
||||
|
@ -236,27 +240,12 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
|
|||
}
|
||||
}
|
||||
|
||||
// verbatim strings could contain "" which C# sees as an escaped ".
|
||||
// @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
|
||||
// merging into a single string literal.
|
||||
// Look for @"aaaaaa" or $"aaaaaa".
|
||||
auto &String = *(Tokens.end() - 1);
|
||||
if (!String->is(tok::string_literal))
|
||||
return false;
|
||||
|
||||
if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
|
||||
(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
|
||||
CSharpStringLiteral->TokenText.startswith(R"($@")"))) {
|
||||
CSharpStringLiteral->TokenText = StringRef(
|
||||
CSharpStringLiteral->TokenText.begin(),
|
||||
String->TokenText.end() - CSharpStringLiteral->TokenText.begin());
|
||||
CSharpStringLiteral->ColumnWidth += String->ColumnWidth;
|
||||
Tokens.erase(Tokens.end() - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto &At = *(Tokens.end() - 2);
|
||||
|
||||
// Look for @"aaaaaa" or $"aaaaaa".
|
||||
if (!(At->is(tok::at) || At->TokenText == "$"))
|
||||
return false;
|
||||
|
||||
|
@ -498,6 +487,68 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
|
|||
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
|
||||
}
|
||||
|
||||
void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
|
||||
FormatToken *CSharpStringLiteral = Tokens.back();
|
||||
|
||||
if (CSharpStringLiteral->Type != TT_CSharpStringLiteral)
|
||||
return;
|
||||
|
||||
// Deal with multiline strings.
|
||||
if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
|
||||
CSharpStringLiteral->TokenText.startswith(R"($@")")))
|
||||
return;
|
||||
|
||||
const char *StrBegin =
|
||||
Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
|
||||
const char *Offset = StrBegin;
|
||||
if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
|
||||
Offset += 2;
|
||||
else // CSharpStringLiteral->TokenText.startswith(R"($@")")
|
||||
Offset += 3;
|
||||
|
||||
// Look for a terminating '"' in the current file buffer.
|
||||
// Make no effort to format code within an interpolated or verbatim string.
|
||||
for (; Offset != Lex->getBuffer().end(); ++Offset) {
|
||||
if (Offset[0] == '"') {
|
||||
// "" within a verbatim string is an escaped double quote: skip it.
|
||||
if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
|
||||
++Offset;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Make no attempt to format code properly if a verbatim string is
|
||||
// unterminated.
|
||||
if (Offset == Lex->getBuffer().end())
|
||||
return;
|
||||
|
||||
StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
|
||||
CSharpStringLiteral->TokenText = LiteralText;
|
||||
|
||||
// Adjust width for potentially multiline string literals.
|
||||
size_t FirstBreak = LiteralText.find('\n');
|
||||
StringRef FirstLineText = FirstBreak == StringRef::npos
|
||||
? LiteralText
|
||||
: LiteralText.substr(0, FirstBreak);
|
||||
CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs(
|
||||
FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,
|
||||
Encoding);
|
||||
size_t LastBreak = LiteralText.rfind('\n');
|
||||
if (LastBreak != StringRef::npos) {
|
||||
CSharpStringLiteral->IsMultiline = true;
|
||||
unsigned StartColumn = 0; // The template tail spans the entire line.
|
||||
CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs(
|
||||
LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
|
||||
Style.TabWidth, Encoding);
|
||||
}
|
||||
|
||||
SourceLocation loc = Offset < Lex->getBuffer().end()
|
||||
? Lex->getSourceLocation(Offset + 1)
|
||||
: SourceMgr.getLocForEndOfFile(ID);
|
||||
resetLexer(SourceMgr.getFileOffset(loc));
|
||||
}
|
||||
|
||||
void FormatTokenLexer::handleTemplateStrings() {
|
||||
FormatToken *BacktickToken = Tokens.back();
|
||||
|
||||
|
|
|
@ -79,6 +79,8 @@ private:
|
|||
// nested template parts by balancing curly braces.
|
||||
void handleTemplateStrings();
|
||||
|
||||
void handleCSharpVerbatimAndInterpolatedStrings();
|
||||
|
||||
void tryParsePythonComment();
|
||||
|
||||
bool tryMerge_TMacro();
|
||||
|
|
|
@ -412,9 +412,9 @@ TEST_F(FormatTestCSharp, CSharpSpaceAfterCStyleCast) {
|
|||
TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
|
||||
FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
|
||||
|
||||
verifyFormat(R"(string str = @"""")", Style);
|
||||
verifyFormat(R"(string str = @"""Hello world""")", Style);
|
||||
verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
|
||||
verifyFormat(R"(string str = @"""";)", Style);
|
||||
verifyFormat(R"(string str = @"""Hello world""";)", Style);
|
||||
verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style);
|
||||
}
|
||||
|
||||
TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
|
||||
|
@ -425,5 +425,37 @@ TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
|
|||
verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
|
||||
}
|
||||
|
||||
TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) {
|
||||
// Use MS style as Google Style inserts a line break before multiline strings.
|
||||
|
||||
// verifyFormat does not understand multiline C# string-literals
|
||||
// so check the format explicitly.
|
||||
|
||||
FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp);
|
||||
|
||||
std::string Code = R"(string s1 = $@"some code:
|
||||
class {className} {{
|
||||
{className}() {{}}
|
||||
}}";)";
|
||||
|
||||
EXPECT_EQ(Code, format(Code, Style));
|
||||
|
||||
// Multiline string in the middle of a function call.
|
||||
Code = R"(
|
||||
var x = foo(className, $@"some code:
|
||||
class {className} {{
|
||||
{className}() {{}}
|
||||
}}",
|
||||
y);)"; // y aligned with `className` arg.
|
||||
|
||||
EXPECT_EQ(Code, format(Code, Style));
|
||||
|
||||
// Interpolated string with embedded multiline string.
|
||||
Code = R"(Console.WriteLine($"{string.Join(@",
|
||||
", values)}");)";
|
||||
|
||||
EXPECT_EQ(Code, format(Code, Style));
|
||||
}
|
||||
|
||||
} // namespace format
|
||||
} // end namespace clang
|
||||
|
|
Loading…
Reference in New Issue