forked from OSchip/llvm-project
Handle zero-width and double-width characters in string literals and comments.
Summary: Count column width instead of the number of code points. This also includes correct handling of tabs inside string literals and comments (with an exception of multiline string literals/comments, where tabs are present before the first escaped newline). Reviewers: djasper, klimek Reviewed By: klimek CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1601 llvm-svn: 190052
This commit is contained in:
parent
15832288f4
commit
ebb43caae2
|
@ -149,9 +149,12 @@ struct FormatStyle {
|
|||
/// Otherwise puts them into the right-most column.
|
||||
bool AlignEscapedNewlinesLeft;
|
||||
|
||||
/// \brief The number of characters to use for indentation.
|
||||
/// \brief The number of columns to use for indentation.
|
||||
unsigned IndentWidth;
|
||||
|
||||
/// \brief The number of columns used for tab stops.
|
||||
unsigned TabWidth;
|
||||
|
||||
/// \brief The number of characters to use for indentation of constructor
|
||||
/// initializer lists.
|
||||
unsigned ConstructorInitializerIndentWidth;
|
||||
|
|
|
@ -41,6 +41,7 @@ static bool IsBlank(char C) {
|
|||
static BreakableToken::Split getCommentSplit(StringRef Text,
|
||||
unsigned ContentStartColumn,
|
||||
unsigned ColumnLimit,
|
||||
unsigned TabWidth,
|
||||
encoding::Encoding Encoding) {
|
||||
if (ColumnLimit <= ContentStartColumn + 1)
|
||||
return BreakableToken::Split(StringRef::npos, 0);
|
||||
|
@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
|
|||
unsigned MaxSplitBytes = 0;
|
||||
|
||||
for (unsigned NumChars = 0;
|
||||
NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
|
||||
MaxSplitBytes +=
|
||||
NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
|
||||
unsigned BytesInChar =
|
||||
encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
|
||||
NumChars +=
|
||||
encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
|
||||
ContentStartColumn, TabWidth, Encoding);
|
||||
MaxSplitBytes += BytesInChar;
|
||||
}
|
||||
|
||||
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
|
||||
if (SpaceOffset == StringRef::npos ||
|
||||
|
@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
|
|||
static BreakableToken::Split getStringSplit(StringRef Text,
|
||||
unsigned ContentStartColumn,
|
||||
unsigned ColumnLimit,
|
||||
unsigned TabWidth,
|
||||
encoding::Encoding Encoding) {
|
||||
// FIXME: Reduce unit test case.
|
||||
if (Text.empty())
|
||||
|
@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
|
|||
return BreakableToken::Split(StringRef::npos, 0);
|
||||
unsigned MaxSplit =
|
||||
std::min<unsigned>(ColumnLimit - ContentStartColumn,
|
||||
encoding::getCodePointCount(Text, Encoding) - 1);
|
||||
encoding::columnWidthWithTabs(Text, ContentStartColumn,
|
||||
TabWidth, Encoding) -
|
||||
1);
|
||||
StringRef::size_type SpaceOffset = 0;
|
||||
StringRef::size_type SlashOffset = 0;
|
||||
StringRef::size_type WordStartOffset = 0;
|
||||
|
@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
|
|||
Chars += Advance;
|
||||
} else {
|
||||
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
|
||||
Chars += 1;
|
||||
Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
|
||||
ContentStartColumn + Chars,
|
||||
TabWidth, Encoding);
|
||||
}
|
||||
|
||||
if (Chars > MaxSplit)
|
||||
|
@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
|
|||
unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
|
||||
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
|
||||
return StartColumn + Prefix.size() + Postfix.size() +
|
||||
encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
|
||||
encoding::columnWidthWithTabs(Line.substr(Offset, Length),
|
||||
StartColumn + Prefix.size(),
|
||||
Style.TabWidth, Encoding);
|
||||
}
|
||||
|
||||
BreakableSingleLineToken::BreakableSingleLineToken(
|
||||
const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
|
||||
StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
|
||||
: BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
|
||||
Prefix(Prefix), Postfix(Postfix) {
|
||||
StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
|
||||
const FormatStyle &Style)
|
||||
: BreakableToken(Tok, InPPDirective, Encoding, Style),
|
||||
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
|
||||
assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
|
||||
Line = Tok.TokenText.substr(
|
||||
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
|
||||
|
@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken(
|
|||
BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
|
||||
unsigned StartColumn,
|
||||
bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
encoding::Encoding Encoding,
|
||||
const FormatStyle &Style)
|
||||
: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
|
||||
Encoding) {}
|
||||
Encoding, Style) {}
|
||||
|
||||
BreakableToken::Split
|
||||
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const {
|
||||
return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
|
||||
Encoding);
|
||||
Style.TabWidth, Encoding);
|
||||
}
|
||||
|
||||
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
|
||||
|
@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
|
|||
BreakableLineComment::BreakableLineComment(const FormatToken &Token,
|
||||
unsigned StartColumn,
|
||||
bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
encoding::Encoding Encoding,
|
||||
const FormatStyle &Style)
|
||||
: BreakableSingleLineToken(Token, StartColumn,
|
||||
getLineCommentPrefix(Token.TokenText), "",
|
||||
InPPDirective, Encoding) {
|
||||
InPPDirective, Encoding, Style) {
|
||||
OriginalPrefix = Prefix;
|
||||
if (Token.TokenText.size() > Prefix.size() &&
|
||||
isAlphanumeric(Token.TokenText[Prefix.size()])) {
|
||||
|
@ -195,7 +211,7 @@ BreakableToken::Split
|
|||
BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const {
|
||||
return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
|
||||
ColumnLimit, Encoding);
|
||||
ColumnLimit, Style.TabWidth, Encoding);
|
||||
}
|
||||
|
||||
void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
|
||||
|
@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
|
|||
}
|
||||
|
||||
BreakableBlockComment::BreakableBlockComment(
|
||||
const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
|
||||
const FormatToken &Token, unsigned StartColumn,
|
||||
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
: BreakableToken(Token, InPPDirective, Encoding) {
|
||||
encoding::Encoding Encoding, const FormatStyle &Style)
|
||||
: BreakableToken(Token, InPPDirective, Encoding, Style) {
|
||||
StringRef TokenText(Token.TokenText);
|
||||
assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
|
||||
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
|
||||
|
@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment(
|
|||
StartOfLineColumn.resize(Lines.size());
|
||||
StartOfLineColumn[0] = StartColumn + 2;
|
||||
for (size_t i = 1; i < Lines.size(); ++i)
|
||||
adjustWhitespace(Style, i, IndentDelta);
|
||||
adjustWhitespace(i, IndentDelta);
|
||||
|
||||
Decoration = "* ";
|
||||
if (Lines.size() == 1 && !FirstInLine) {
|
||||
|
@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment(
|
|||
});
|
||||
}
|
||||
|
||||
void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
|
||||
unsigned LineIndex,
|
||||
void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
|
||||
int IndentDelta) {
|
||||
// When in a preprocessor directive, the trailing backslash in a block comment
|
||||
// is not needed, but can serve a purpose of uniformity with necessary escaped
|
||||
|
@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
|
|||
if (StartOfLine == StringRef::npos)
|
||||
StartOfLine = Lines[LineIndex].size();
|
||||
|
||||
StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
|
||||
// Adjust Lines to only contain relevant text.
|
||||
Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
|
||||
Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
|
||||
|
@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
|
|||
// if leading tabs are intermixed with spaces, that is not a high priority.
|
||||
|
||||
// Adjust the start column uniformly accross all lines.
|
||||
StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
|
||||
StartOfLineColumn[LineIndex] =
|
||||
std::max<int>(0, Whitespace.size() + IndentDelta);
|
||||
}
|
||||
|
||||
unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
|
||||
|
||||
unsigned BreakableBlockComment::getLineLengthAfterSplit(
|
||||
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
|
||||
return getContentStartColumn(LineIndex, Offset) +
|
||||
encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
|
||||
Encoding) +
|
||||
unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
|
||||
return ContentStartColumn +
|
||||
encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
|
||||
ContentStartColumn, Style.TabWidth,
|
||||
Encoding) +
|
||||
// The last line gets a "*/" postfix.
|
||||
(LineIndex + 1 == Lines.size() ? 2 : 0);
|
||||
}
|
||||
|
@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
|
|||
unsigned ColumnLimit) const {
|
||||
return getCommentSplit(Lines[LineIndex].substr(TailOffset),
|
||||
getContentStartColumn(LineIndex, TailOffset),
|
||||
ColumnLimit, Encoding);
|
||||
ColumnLimit, Style.TabWidth, Encoding);
|
||||
}
|
||||
|
||||
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
|
||||
|
|
|
@ -67,12 +67,14 @@ public:
|
|||
|
||||
protected:
|
||||
BreakableToken(const FormatToken &Tok, bool InPPDirective,
|
||||
encoding::Encoding Encoding)
|
||||
: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
|
||||
encoding::Encoding Encoding, const FormatStyle &Style)
|
||||
: Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
|
||||
Style(Style) {}
|
||||
|
||||
const FormatToken &Tok;
|
||||
const bool InPPDirective;
|
||||
const encoding::Encoding Encoding;
|
||||
const FormatStyle &Style;
|
||||
};
|
||||
|
||||
/// \brief Base class for single line tokens that can be broken.
|
||||
|
@ -88,7 +90,8 @@ public:
|
|||
protected:
|
||||
BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
|
||||
StringRef Prefix, StringRef Postfix,
|
||||
bool InPPDirective, encoding::Encoding Encoding);
|
||||
bool InPPDirective, encoding::Encoding Encoding,
|
||||
const FormatStyle &Style);
|
||||
|
||||
// The column in which the token starts.
|
||||
unsigned StartColumn;
|
||||
|
@ -107,7 +110,8 @@ public:
|
|||
/// \p StartColumn specifies the column in which the token will start
|
||||
/// after formatting.
|
||||
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
|
||||
bool InPPDirective, encoding::Encoding Encoding);
|
||||
bool InPPDirective, encoding::Encoding Encoding,
|
||||
const FormatStyle &Style);
|
||||
|
||||
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const;
|
||||
|
@ -122,7 +126,8 @@ public:
|
|||
/// \p StartColumn specifies the column in which the comment will start
|
||||
/// after formatting.
|
||||
BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
|
||||
bool InPPDirective, encoding::Encoding Encoding);
|
||||
bool InPPDirective, encoding::Encoding Encoding,
|
||||
const FormatStyle &Style);
|
||||
|
||||
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
|
||||
unsigned ColumnLimit) const;
|
||||
|
@ -144,10 +149,10 @@ public:
|
|||
/// after formatting, while \p OriginalStartColumn specifies in which
|
||||
/// column the comment started before formatting.
|
||||
/// If the comment starts a line after formatting, set \p FirstInLine to true.
|
||||
BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
|
||||
unsigned StartColumn, unsigned OriginaStartColumn,
|
||||
bool FirstInLine, bool InPPDirective,
|
||||
encoding::Encoding Encoding);
|
||||
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
|
||||
unsigned OriginaStartColumn, bool FirstInLine,
|
||||
bool InPPDirective, encoding::Encoding Encoding,
|
||||
const FormatStyle &Style);
|
||||
|
||||
virtual unsigned getLineCount() const;
|
||||
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
|
||||
|
@ -172,8 +177,7 @@ private:
|
|||
// Sets StartOfLineColumn to the intended column in which the text at
|
||||
// Lines[LineIndex] starts (note that the decoration, if present, is not
|
||||
// considered part of the text).
|
||||
void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
|
||||
int IndentDelta);
|
||||
void adjustWhitespace(unsigned LineIndex, int IndentDelta);
|
||||
|
||||
// Returns the column at which the text in line LineIndex starts, when broken
|
||||
// at TailOffset. Note that the decoration (if present) is not considered part
|
||||
|
|
|
@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
|
|||
State.Stack[i].BreakBeforeParameter = true;
|
||||
|
||||
unsigned ColumnsUsed =
|
||||
State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
|
||||
State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
|
||||
// We can only affect layout of the first and the last line, so the penalty
|
||||
// for all other lines is constant, and we ignore it.
|
||||
State.Column = Current.CodePointsInLastLine;
|
||||
State.Column = Current.LastLineColumnWidth;
|
||||
|
||||
if (ColumnsUsed > getColumnLimit(State))
|
||||
return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
|
||||
|
@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
return 0;
|
||||
|
||||
Token.reset(new BreakableStringLiteral(
|
||||
Current, StartColumn, State.Line->InPPDirective, Encoding));
|
||||
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
|
||||
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
|
||||
unsigned OriginalStartColumn =
|
||||
SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
|
||||
1;
|
||||
Token.reset(new BreakableBlockComment(
|
||||
Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
|
||||
State.Line->InPPDirective, Encoding));
|
||||
Current, StartColumn, OriginalStartColumn, !Current.Previous,
|
||||
State.Line->InPPDirective, Encoding, Style));
|
||||
} else if (Current.Type == TT_LineComment &&
|
||||
(Current.Previous == NULL ||
|
||||
Current.Previous->Type != TT_ImplicitStringLiteral)) {
|
||||
|
@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
// leading whitespace in consecutive lines when changing indentation of
|
||||
// the first line similar to what we do with block comments.
|
||||
if (Current.isMultiline()) {
|
||||
State.Column = StartColumn + Current.CodePointsInFirstLine;
|
||||
State.Column = StartColumn + Current.FirstLineColumnWidth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Token.reset(new BreakableLineComment(Current, StartColumn,
|
||||
State.Line->InPPDirective, Encoding));
|
||||
Token.reset(new BreakableLineComment(
|
||||
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "clang/Basic/LLVM.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
#include "llvm/Support/Unicode.h"
|
||||
|
||||
namespace clang {
|
||||
namespace format {
|
||||
|
@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
|
|||
}
|
||||
}
|
||||
|
||||
/// \brief Returns the number of columns required to display the \p Text on a
|
||||
/// generic Unicode-capable terminal. Text is assumed to use the specified
|
||||
/// \p Encoding.
|
||||
inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
|
||||
if (Encoding == Encoding_UTF8) {
|
||||
int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
|
||||
if (ContentWidth >= 0)
|
||||
return ContentWidth;
|
||||
}
|
||||
return Text.size();
|
||||
}
|
||||
|
||||
/// \brief Returns the number of columns required to display the \p Text,
|
||||
/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
|
||||
/// text is assumed to use the specified \p Encoding.
|
||||
inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
|
||||
unsigned TabWidth, Encoding Encoding) {
|
||||
unsigned TotalWidth = 0;
|
||||
StringRef Tail = Text;
|
||||
for (;;) {
|
||||
StringRef::size_type TabPos = Tail.find('\t');
|
||||
if (TabPos == StringRef::npos)
|
||||
return TotalWidth + columnWidth(Tail, Encoding);
|
||||
int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
|
||||
assert(Width >= 0);
|
||||
TotalWidth += Width;
|
||||
TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
|
||||
Tail = Tail.substr(TabPos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Gets the number of bytes in a sequence representing a single
|
||||
/// codepoint and starting with FirstChar in the specified Encoding.
|
||||
inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
|
||||
|
|
|
@ -136,6 +136,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> {
|
|||
IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
|
||||
IO.mapOptional("Standard", Style.Standard);
|
||||
IO.mapOptional("IndentWidth", Style.IndentWidth);
|
||||
IO.mapOptional("TabWidth", Style.TabWidth);
|
||||
IO.mapOptional("UseTab", Style.UseTab);
|
||||
IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
|
||||
IO.mapOptional("IndentFunctionDeclarationAfterType",
|
||||
|
@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() {
|
|||
LLVMStyle.IndentCaseLabels = false;
|
||||
LLVMStyle.IndentFunctionDeclarationAfterType = false;
|
||||
LLVMStyle.IndentWidth = 2;
|
||||
LLVMStyle.TabWidth = 8;
|
||||
LLVMStyle.MaxEmptyLinesToKeep = 1;
|
||||
LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
|
||||
LLVMStyle.ObjCSpaceBeforeProtocolList = true;
|
||||
|
@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() {
|
|||
GoogleStyle.IndentCaseLabels = true;
|
||||
GoogleStyle.IndentFunctionDeclarationAfterType = true;
|
||||
GoogleStyle.IndentWidth = 2;
|
||||
GoogleStyle.TabWidth = 8;
|
||||
GoogleStyle.MaxEmptyLinesToKeep = 1;
|
||||
GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
|
||||
GoogleStyle.ObjCSpaceBeforeProtocolList = false;
|
||||
|
@ -629,7 +632,7 @@ private:
|
|||
++Column;
|
||||
break;
|
||||
case '\t':
|
||||
Column += Style.IndentWidth - Column % Style.IndentWidth;
|
||||
Column += Style.TabWidth - Column % Style.TabWidth;
|
||||
break;
|
||||
default:
|
||||
++Column;
|
||||
|
@ -681,10 +684,12 @@ private:
|
|||
StringRef Text = FormatTok->TokenText;
|
||||
size_t FirstNewlinePos = Text.find('\n');
|
||||
if (FirstNewlinePos != StringRef::npos) {
|
||||
FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
|
||||
Text.substr(0, FirstNewlinePos), Encoding);
|
||||
FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
|
||||
Text.substr(Text.find_last_of('\n') + 1), Encoding);
|
||||
// FIXME: Handle embedded tabs.
|
||||
FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
|
||||
Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
|
||||
FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
|
||||
Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
|
||||
Encoding);
|
||||
}
|
||||
}
|
||||
// FIXME: Add the CodePointCount to Column.
|
||||
|
|
|
@ -83,7 +83,7 @@ class AnnotatedLine;
|
|||
struct FormatToken {
|
||||
FormatToken()
|
||||
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
|
||||
CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
|
||||
CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
|
||||
IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
|
||||
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
|
||||
CanBreakBefore(false), ClosesTemplateDeclaration(false),
|
||||
|
@ -120,15 +120,15 @@ struct FormatToken {
|
|||
/// \brief Contains the number of code points in the first line of a
|
||||
/// multi-line string literal or comment. Zero if there's no newline in the
|
||||
/// token.
|
||||
unsigned CodePointsInFirstLine;
|
||||
unsigned FirstLineColumnWidth;
|
||||
|
||||
/// \brief Contains the number of code points in the last line of a
|
||||
/// multi-line string literal or comment. Can be zero for line comments.
|
||||
unsigned CodePointsInLastLine;
|
||||
unsigned LastLineColumnWidth;
|
||||
|
||||
/// \brief Returns \c true if the token text contains newlines (escaped or
|
||||
/// not).
|
||||
bool isMultiline() const { return CodePointsInFirstLine != 0; }
|
||||
bool isMultiline() const { return FirstLineColumnWidth != 0; }
|
||||
|
||||
/// \brief Indicates that this is the first token.
|
||||
bool IsFirst;
|
||||
|
|
|
@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) {
|
|||
if (!Style.UseTab)
|
||||
return std::string(Spaces, ' ');
|
||||
|
||||
return std::string(Spaces / Style.IndentWidth, '\t') +
|
||||
std::string(Spaces % Style.IndentWidth, ' ');
|
||||
return std::string(Spaces / Style.TabWidth, '\t') +
|
||||
std::string(Spaces % Style.TabWidth, ' ');
|
||||
}
|
||||
|
||||
} // namespace format
|
||||
|
|
|
@ -5638,9 +5638,41 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
|
|||
"}",
|
||||
21, 0, Tab));
|
||||
|
||||
Tab.TabWidth = 4;
|
||||
Tab.IndentWidth = 8;
|
||||
verifyFormat("class TabWidth4Indent8 {\n"
|
||||
"\t\tvoid f() {\n"
|
||||
"\t\t\t\tsomeFunction(parameter1,\n"
|
||||
"\t\t\t\t\t\t\t parameter2);\n"
|
||||
"\t\t}\n"
|
||||
"};",
|
||||
Tab);
|
||||
|
||||
Tab.TabWidth = 4;
|
||||
Tab.IndentWidth = 4;
|
||||
verifyFormat("class TabWidth4Indent4 {\n"
|
||||
"\tvoid f() {\n"
|
||||
"\t\tsomeFunction(parameter1,\n"
|
||||
"\t\t\t\t\t parameter2);\n"
|
||||
"\t}\n"
|
||||
"};",
|
||||
Tab);
|
||||
|
||||
Tab.TabWidth = 8;
|
||||
Tab.IndentWidth = 4;
|
||||
verifyFormat("class TabWidth8Indent4 {\n"
|
||||
" void f() {\n"
|
||||
"\tsomeFunction(parameter1,\n"
|
||||
"\t\t parameter2);\n"
|
||||
" }\n"
|
||||
"};",
|
||||
Tab);
|
||||
|
||||
// FIXME: To correctly count mixed whitespace we need to
|
||||
// also correctly count mixed whitespace in front of the comment.
|
||||
//
|
||||
|
||||
// Tab.TabWidth = 8;
|
||||
// Tab.IndentWidth = 8;
|
||||
// EXPECT_EQ("/*\n"
|
||||
// "\t a\t\tcomment\n"
|
||||
// "\t in multiple lines\n"
|
||||
|
@ -6074,15 +6106,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) {
|
|||
verifyFormat("\"Однажды в студёную зимнюю пору...\"",
|
||||
getLLVMStyleWithColumns(35));
|
||||
verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"",
|
||||
getLLVMStyleWithColumns(21));
|
||||
getLLVMStyleWithColumns(31));
|
||||
verifyFormat("// Однажды в студёную зимнюю пору...",
|
||||
getLLVMStyleWithColumns(36));
|
||||
verifyFormat("// 一 二 三 四 五 六 七 八 九 十",
|
||||
getLLVMStyleWithColumns(22));
|
||||
getLLVMStyleWithColumns(32));
|
||||
verifyFormat("/* Однажды в студёную зимнюю пору... */",
|
||||
getLLVMStyleWithColumns(39));
|
||||
verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */",
|
||||
getLLVMStyleWithColumns(25));
|
||||
getLLVMStyleWithColumns(35));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SplitsUTF8Strings) {
|
||||
|
@ -6093,11 +6125,29 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
|
|||
"\"пору,\"",
|
||||
format("\"Однажды, в студёную зимнюю пору,\"",
|
||||
getLLVMStyleWithColumns(13)));
|
||||
EXPECT_EQ("\"一 二 三 四 \"\n"
|
||||
"\"五 六 七 八 \"\n"
|
||||
"\"九 十\"",
|
||||
format("\"一 二 三 四 五 六 七 八 九 十\"",
|
||||
getLLVMStyleWithColumns(10)));
|
||||
EXPECT_EQ("\"一 二 三 \"\n"
|
||||
"\"四 五六 \"\n"
|
||||
"\"七 八 九 \"\n"
|
||||
"\"十\"",
|
||||
format("\"一 二 三 四 五六 七 八 九 十\"",
|
||||
getLLVMStyleWithColumns(11)));
|
||||
EXPECT_EQ("\"一\t二 \"\n"
|
||||
"\"\t三 \"\n"
|
||||
"\"四 五\t六 \"\n"
|
||||
"\"\t七 \"\n"
|
||||
"\"八九十\tqq\"",
|
||||
format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
|
||||
getLLVMStyleWithColumns(11)));
|
||||
}
|
||||
|
||||
|
||||
TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
|
||||
EXPECT_EQ("const char *sssss =\n"
|
||||
" \"一二三四五六七八\\\n"
|
||||
" 九 十\";",
|
||||
format("const char *sssss = \"一二三四五六七八\\\n"
|
||||
" 九 十\";",
|
||||
getLLVMStyleWithColumns(30)));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SplitsUTF8LineComments) {
|
||||
|
@ -6109,9 +6159,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) {
|
|||
getLLVMStyleWithColumns(13)));
|
||||
EXPECT_EQ("// 一二三\n"
|
||||
"// 四五六七\n"
|
||||
"// 八\n"
|
||||
"// 九 十",
|
||||
format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(6)));
|
||||
"// 八 九\n"
|
||||
"// 十",
|
||||
format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(9)));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SplitsUTF8BlockComments) {
|
||||
|
@ -6126,18 +6176,20 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) {
|
|||
format("/* Гляжу, поднимается медленно в гору\n"
|
||||
" * Лошадка, везущая хворосту воз. */",
|
||||
getLLVMStyleWithColumns(13)));
|
||||
EXPECT_EQ("/* 一二三\n"
|
||||
" * 四五六七\n"
|
||||
" * 八\n"
|
||||
" * 九 十\n"
|
||||
" */",
|
||||
format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(6)));
|
||||
EXPECT_EQ(
|
||||
"/* 一二三\n"
|
||||
" * 四五六七\n"
|
||||
" * 八 九\n"
|
||||
" * 十 */",
|
||||
format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9)));
|
||||
EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n"
|
||||
" * 𝕓𝕪𝕥𝕖\n"
|
||||
" * 𝖀𝕿𝕱-𝟠 */",
|
||||
format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12)));
|
||||
}
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
|
||||
FormatStyle Style = getLLVMStyle();
|
||||
|
||||
|
@ -6185,8 +6237,6 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
|
|||
Style);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
TEST_F(FormatTest, FormatsWithWebKitStyle) {
|
||||
FormatStyle Style = getWebKitStyle();
|
||||
|
||||
|
|
Loading…
Reference in New Issue