Handle zero-width and double-width characters in string literals and comments.

Summary:
Count column width instead of the number of code points. This also
includes correct handling of tabs inside string literals and comments (with an
exception of multiline string literals/comments, where tabs are present before
the first escaped newline).

Reviewers: djasper, klimek

Reviewed By: klimek

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D1601

llvm-svn: 190052
This commit is contained in:
Alexander Kornienko 2013-09-05 14:08:34 +00:00
parent 15832288f4
commit ebb43caae2
9 changed files with 189 additions and 76 deletions

View File

@ -149,9 +149,12 @@ struct FormatStyle {
/// Otherwise puts them into the right-most column.
bool AlignEscapedNewlinesLeft;
/// \brief The number of characters to use for indentation.
/// \brief The number of columns to use for indentation.
unsigned IndentWidth;
/// \brief The number of columns used for tab stops.
unsigned TabWidth;
/// \brief The number of characters to use for indentation of constructor
/// initializer lists.
unsigned ConstructorInitializerIndentWidth;

View File

@ -41,6 +41,7 @@ static bool IsBlank(char C) {
static BreakableToken::Split getCommentSplit(StringRef Text,
unsigned ContentStartColumn,
unsigned ColumnLimit,
unsigned TabWidth,
encoding::Encoding Encoding) {
if (ColumnLimit <= ContentStartColumn + 1)
return BreakableToken::Split(StringRef::npos, 0);
@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
unsigned MaxSplitBytes = 0;
for (unsigned NumChars = 0;
NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
MaxSplitBytes +=
NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
unsigned BytesInChar =
encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
NumChars +=
encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
ContentStartColumn, TabWidth, Encoding);
MaxSplitBytes += BytesInChar;
}
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
if (SpaceOffset == StringRef::npos ||
@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
static BreakableToken::Split getStringSplit(StringRef Text,
unsigned ContentStartColumn,
unsigned ColumnLimit,
unsigned TabWidth,
encoding::Encoding Encoding) {
// FIXME: Reduce unit test case.
if (Text.empty())
@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
return BreakableToken::Split(StringRef::npos, 0);
unsigned MaxSplit =
std::min<unsigned>(ColumnLimit - ContentStartColumn,
encoding::getCodePointCount(Text, Encoding) - 1);
encoding::columnWidthWithTabs(Text, ContentStartColumn,
TabWidth, Encoding) -
1);
StringRef::size_type SpaceOffset = 0;
StringRef::size_type SlashOffset = 0;
StringRef::size_type WordStartOffset = 0;
@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
Chars += Advance;
} else {
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
Chars += 1;
Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
ContentStartColumn + Chars,
TabWidth, Encoding);
}
if (Chars > MaxSplit)
@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
return StartColumn + Prefix.size() + Postfix.size() +
encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
encoding::columnWidthWithTabs(Line.substr(Offset, Length),
StartColumn + Prefix.size(),
Style.TabWidth, Encoding);
}
BreakableSingleLineToken::BreakableSingleLineToken(
const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
: BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
Prefix(Prefix), Postfix(Postfix) {
StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style)
: BreakableToken(Tok, InPPDirective, Encoding, Style),
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
Line = Tok.TokenText.substr(
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken(
BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
unsigned StartColumn,
bool InPPDirective,
encoding::Encoding Encoding)
encoding::Encoding Encoding,
const FormatStyle &Style)
: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
Encoding) {}
Encoding, Style) {}
BreakableToken::Split
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const {
return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
Encoding);
Style.TabWidth, Encoding);
}
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
BreakableLineComment::BreakableLineComment(const FormatToken &Token,
unsigned StartColumn,
bool InPPDirective,
encoding::Encoding Encoding)
encoding::Encoding Encoding,
const FormatStyle &Style)
: BreakableSingleLineToken(Token, StartColumn,
getLineCommentPrefix(Token.TokenText), "",
InPPDirective, Encoding) {
InPPDirective, Encoding, Style) {
OriginalPrefix = Prefix;
if (Token.TokenText.size() > Prefix.size() &&
isAlphanumeric(Token.TokenText[Prefix.size()])) {
@ -195,7 +211,7 @@ BreakableToken::Split
BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const {
return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
ColumnLimit, Encoding);
ColumnLimit, Style.TabWidth, Encoding);
}
void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
}
BreakableBlockComment::BreakableBlockComment(
const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
encoding::Encoding Encoding)
: BreakableToken(Token, InPPDirective, Encoding) {
encoding::Encoding Encoding, const FormatStyle &Style)
: BreakableToken(Token, InPPDirective, Encoding, Style) {
StringRef TokenText(Token.TokenText);
assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment(
StartOfLineColumn.resize(Lines.size());
StartOfLineColumn[0] = StartColumn + 2;
for (size_t i = 1; i < Lines.size(); ++i)
adjustWhitespace(Style, i, IndentDelta);
adjustWhitespace(i, IndentDelta);
Decoration = "* ";
if (Lines.size() == 1 && !FirstInLine) {
@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment(
});
}
void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
unsigned LineIndex,
void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
int IndentDelta) {
// When in a preprocessor directive, the trailing backslash in a block comment
// is not needed, but can serve a purpose of uniformity with necessary escaped
@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
if (StartOfLine == StringRef::npos)
StartOfLine = Lines[LineIndex].size();
StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
// Adjust Lines to only contain relevant text.
Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
// if leading tabs are intermixed with spaces, that is not a high priority.
// Adjust the start column uniformly accross all lines.
StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
StartOfLineColumn[LineIndex] =
std::max<int>(0, Whitespace.size() + IndentDelta);
}
unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
unsigned BreakableBlockComment::getLineLengthAfterSplit(
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
return getContentStartColumn(LineIndex, Offset) +
encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
Encoding) +
unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
return ContentStartColumn +
encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
ContentStartColumn, Style.TabWidth,
Encoding) +
// The last line gets a "*/" postfix.
(LineIndex + 1 == Lines.size() ? 2 : 0);
}
@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const {
return getCommentSplit(Lines[LineIndex].substr(TailOffset),
getContentStartColumn(LineIndex, TailOffset),
ColumnLimit, Encoding);
ColumnLimit, Style.TabWidth, Encoding);
}
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

View File

@ -67,12 +67,14 @@ public:
protected:
BreakableToken(const FormatToken &Tok, bool InPPDirective,
encoding::Encoding Encoding)
: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
encoding::Encoding Encoding, const FormatStyle &Style)
: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
Style(Style) {}
const FormatToken &Tok;
const bool InPPDirective;
const encoding::Encoding Encoding;
const FormatStyle &Style;
};
/// \brief Base class for single line tokens that can be broken.
@ -88,7 +90,8 @@ public:
protected:
BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
StringRef Prefix, StringRef Postfix,
bool InPPDirective, encoding::Encoding Encoding);
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
// The column in which the token starts.
unsigned StartColumn;
@ -107,7 +110,8 @@ public:
/// \p StartColumn specifies the column in which the token will start
/// after formatting.
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
bool InPPDirective, encoding::Encoding Encoding);
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const;
@ -122,7 +126,8 @@ public:
/// \p StartColumn specifies the column in which the comment will start
/// after formatting.
BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
bool InPPDirective, encoding::Encoding Encoding);
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const;
@ -144,10 +149,10 @@ public:
/// after formatting, while \p OriginalStartColumn specifies in which
/// column the comment started before formatting.
/// If the comment starts a line after formatting, set \p FirstInLine to true.
BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
unsigned StartColumn, unsigned OriginaStartColumn,
bool FirstInLine, bool InPPDirective,
encoding::Encoding Encoding);
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
unsigned OriginaStartColumn, bool FirstInLine,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
virtual unsigned getLineCount() const;
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
@ -172,8 +177,7 @@ private:
// Sets StartOfLineColumn to the intended column in which the text at
// Lines[LineIndex] starts (note that the decoration, if present, is not
// considered part of the text).
void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
int IndentDelta);
void adjustWhitespace(unsigned LineIndex, int IndentDelta);
// Returns the column at which the text in line LineIndex starts, when broken
// at TailOffset. Note that the decoration (if present) is not considered part

View File

@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
State.Stack[i].BreakBeforeParameter = true;
unsigned ColumnsUsed =
State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
// We can only affect layout of the first and the last line, so the penalty
// for all other lines is constant, and we ignore it.
State.Column = Current.CodePointsInLastLine;
State.Column = Current.LastLineColumnWidth;
if (ColumnsUsed > getColumnLimit(State))
return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
return 0;
Token.reset(new BreakableStringLiteral(
Current, StartColumn, State.Line->InPPDirective, Encoding));
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
unsigned OriginalStartColumn =
SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
1;
Token.reset(new BreakableBlockComment(
Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
State.Line->InPPDirective, Encoding));
Current, StartColumn, OriginalStartColumn, !Current.Previous,
State.Line->InPPDirective, Encoding, Style));
} else if (Current.Type == TT_LineComment &&
(Current.Previous == NULL ||
Current.Previous->Type != TT_ImplicitStringLiteral)) {
@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// leading whitespace in consecutive lines when changing indentation of
// the first line similar to what we do with block comments.
if (Current.isMultiline()) {
State.Column = StartColumn + Current.CodePointsInFirstLine;
State.Column = StartColumn + Current.FirstLineColumnWidth;
return 0;
}
Token.reset(new BreakableLineComment(Current, StartColumn,
State.Line->InPPDirective, Encoding));
Token.reset(new BreakableLineComment(
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
} else {
return 0;
}

View File

@ -18,6 +18,7 @@
#include "clang/Basic/LLVM.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Unicode.h"
namespace clang {
namespace format {
@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
}
}
/// \brief Returns the number of columns required to display the \p Text on a
/// generic Unicode-capable terminal. Text is assumed to use the specified
/// \p Encoding.
inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
if (Encoding == Encoding_UTF8) {
int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
if (ContentWidth >= 0)
return ContentWidth;
}
return Text.size();
}
/// \brief Returns the number of columns required to display the \p Text,
/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
/// text is assumed to use the specified \p Encoding.
inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
unsigned TabWidth, Encoding Encoding) {
unsigned TotalWidth = 0;
StringRef Tail = Text;
for (;;) {
StringRef::size_type TabPos = Tail.find('\t');
if (TabPos == StringRef::npos)
return TotalWidth + columnWidth(Tail, Encoding);
int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
assert(Width >= 0);
TotalWidth += Width;
TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
Tail = Tail.substr(TabPos + 1);
}
}
/// \brief Gets the number of bytes in a sequence representing a single
/// codepoint and starting with FirstChar in the specified Encoding.
inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {

View File

@ -136,6 +136,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> {
IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
IO.mapOptional("Standard", Style.Standard);
IO.mapOptional("IndentWidth", Style.IndentWidth);
IO.mapOptional("TabWidth", Style.TabWidth);
IO.mapOptional("UseTab", Style.UseTab);
IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
IO.mapOptional("IndentFunctionDeclarationAfterType",
@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.IndentCaseLabels = false;
LLVMStyle.IndentFunctionDeclarationAfterType = false;
LLVMStyle.IndentWidth = 2;
LLVMStyle.TabWidth = 8;
LLVMStyle.MaxEmptyLinesToKeep = 1;
LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
LLVMStyle.ObjCSpaceBeforeProtocolList = true;
@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() {
GoogleStyle.IndentCaseLabels = true;
GoogleStyle.IndentFunctionDeclarationAfterType = true;
GoogleStyle.IndentWidth = 2;
GoogleStyle.TabWidth = 8;
GoogleStyle.MaxEmptyLinesToKeep = 1;
GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
GoogleStyle.ObjCSpaceBeforeProtocolList = false;
@ -629,7 +632,7 @@ private:
++Column;
break;
case '\t':
Column += Style.IndentWidth - Column % Style.IndentWidth;
Column += Style.TabWidth - Column % Style.TabWidth;
break;
default:
++Column;
@ -681,10 +684,12 @@ private:
StringRef Text = FormatTok->TokenText;
size_t FirstNewlinePos = Text.find('\n');
if (FirstNewlinePos != StringRef::npos) {
FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
Text.substr(0, FirstNewlinePos), Encoding);
FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
Text.substr(Text.find_last_of('\n') + 1), Encoding);
// FIXME: Handle embedded tabs.
FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
Encoding);
}
}
// FIXME: Add the CodePointCount to Column.

View File

@ -83,7 +83,7 @@ class AnnotatedLine;
struct FormatToken {
FormatToken()
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
CanBreakBefore(false), ClosesTemplateDeclaration(false),
@ -120,15 +120,15 @@ struct FormatToken {
/// \brief Contains the number of code points in the first line of a
/// multi-line string literal or comment. Zero if there's no newline in the
/// token.
unsigned CodePointsInFirstLine;
unsigned FirstLineColumnWidth;
/// \brief Contains the number of code points in the last line of a
/// multi-line string literal or comment. Can be zero for line comments.
unsigned CodePointsInLastLine;
unsigned LastLineColumnWidth;
/// \brief Returns \c true if the token text contains newlines (escaped or
/// not).
bool isMultiline() const { return CodePointsInFirstLine != 0; }
bool isMultiline() const { return FirstLineColumnWidth != 0; }
/// \brief Indicates that this is the first token.
bool IsFirst;

View File

@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) {
if (!Style.UseTab)
return std::string(Spaces, ' ');
return std::string(Spaces / Style.IndentWidth, '\t') +
std::string(Spaces % Style.IndentWidth, ' ');
return std::string(Spaces / Style.TabWidth, '\t') +
std::string(Spaces % Style.TabWidth, ' ');
}
} // namespace format

View File

@ -5638,9 +5638,41 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
"}",
21, 0, Tab));
Tab.TabWidth = 4;
Tab.IndentWidth = 8;
verifyFormat("class TabWidth4Indent8 {\n"
"\t\tvoid f() {\n"
"\t\t\t\tsomeFunction(parameter1,\n"
"\t\t\t\t\t\t\t parameter2);\n"
"\t\t}\n"
"};",
Tab);
Tab.TabWidth = 4;
Tab.IndentWidth = 4;
verifyFormat("class TabWidth4Indent4 {\n"
"\tvoid f() {\n"
"\t\tsomeFunction(parameter1,\n"
"\t\t\t\t\t parameter2);\n"
"\t}\n"
"};",
Tab);
Tab.TabWidth = 8;
Tab.IndentWidth = 4;
verifyFormat("class TabWidth8Indent4 {\n"
" void f() {\n"
"\tsomeFunction(parameter1,\n"
"\t\t parameter2);\n"
" }\n"
"};",
Tab);
// FIXME: To correctly count mixed whitespace we need to
// also correctly count mixed whitespace in front of the comment.
//
// Tab.TabWidth = 8;
// Tab.IndentWidth = 8;
// EXPECT_EQ("/*\n"
// "\t a\t\tcomment\n"
// "\t in multiple lines\n"
@ -6074,15 +6106,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) {
verifyFormat("\"Однажды в студёную зимнюю пору...\"",
getLLVMStyleWithColumns(35));
verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"",
getLLVMStyleWithColumns(21));
getLLVMStyleWithColumns(31));
verifyFormat("// Однажды в студёную зимнюю пору...",
getLLVMStyleWithColumns(36));
verifyFormat("// 一 二 三 四 五 六 七 八 九 十",
getLLVMStyleWithColumns(22));
getLLVMStyleWithColumns(32));
verifyFormat("/* Однажды в студёную зимнюю пору... */",
getLLVMStyleWithColumns(39));
verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */",
getLLVMStyleWithColumns(25));
getLLVMStyleWithColumns(35));
}
TEST_F(FormatTest, SplitsUTF8Strings) {
@ -6093,11 +6125,29 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
"\"пору,\"",
format("\"Однажды, в студёную зимнюю пору,\"",
getLLVMStyleWithColumns(13)));
EXPECT_EQ("\"一 二 三 四 \"\n"
"\"五 六 七 八 \"\n"
"\"九 十\"",
format("\"一 二 三 四 五 六 七 八 九 十\"",
getLLVMStyleWithColumns(10)));
EXPECT_EQ("\"一 二 三 \"\n"
"\"四 五六 \"\n"
"\"七 八 九 \"\n"
"\"\"",
format("\"一 二 三 四 五六 七 八 九 十\"",
getLLVMStyleWithColumns(11)));
EXPECT_EQ("\"\t\"\n"
"\"\t\"\n"
"\"四 五\t\"\n"
"\"\t\"\n"
"\"八九十\tqq\"",
format("\"\t\t三 四 五\t\t七 八九十\tqq\"",
getLLVMStyleWithColumns(11)));
}
TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
EXPECT_EQ("const char *sssss =\n"
" \"一二三四五六七八\\\n"
" 九 十\";",
format("const char *sssss = \"一二三四五六七八\\\n"
" 九 十\";",
getLLVMStyleWithColumns(30)));
}
TEST_F(FormatTest, SplitsUTF8LineComments) {
@ -6109,9 +6159,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) {
getLLVMStyleWithColumns(13)));
EXPECT_EQ("// 一二三\n"
"// 四五六七\n"
"// 八\n"
"// ",
format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(6)));
"// 八\n"
"// ",
format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(9)));
}
TEST_F(FormatTest, SplitsUTF8BlockComments) {
@ -6126,18 +6176,20 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) {
format("/* Гляжу, поднимается медленно в гору\n"
" * Лошадка, везущая хворосту воз. */",
getLLVMStyleWithColumns(13)));
EXPECT_EQ("/* 一二三\n"
" * 四五六七\n"
" * 八\n"
" * \n"
" */",
format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(6)));
EXPECT_EQ(
"/* 一二三\n"
" * 四五六七\n"
" * \n"
" * 十 */",
format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9)));
EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n"
" * 𝕓𝕪𝕥𝕖\n"
" * 𝖀𝕿𝕱-𝟠 */",
format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12)));
}
#endif // _MSC_VER
TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
FormatStyle Style = getLLVMStyle();
@ -6185,8 +6237,6 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
Style);
}
#endif
TEST_F(FormatTest, FormatsWithWebKitStyle) {
FormatStyle Style = getWebKitStyle();