diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index 320913c2d46f..1b1827e3f9a9 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -13,27 +13,82 @@ /// //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "format-token-breaker" + #include "BreakableToken.h" +#include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include namespace clang { namespace format { +namespace { -BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, - unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine(LineIndex).substr(TailOffset); - unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); +// FIXME: Move helper string functions to where it makes sense. + +unsigned getOctalLength(StringRef Text) { + unsigned I = 1; + while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { + ++I; + } + return I; +} + +unsigned getHexLength(StringRef Text) { + unsigned I = 2; // Point after '\x'. + while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || + (Text[I] >= 'a' && Text[I] <= 'f') || + (Text[I] >= 'A' && Text[I] <= 'F'))) { + ++I; + } + return I; +} + +unsigned getEscapeSequenceLength(StringRef Text) { + assert(Text[0] == '\\'); + if (Text.size() < 2) + return 1; + + switch (Text[1]) { + case 'u': + return 6; + case 'U': + return 10; + case 'x': + return getHexLength(Text); + default: + if (Text[1] >= '0' && Text[1] <= '7') + return getOctalLength(Text); + return 2; + } +} + +StringRef::size_type getStartOfCharacter(StringRef Text, + StringRef::size_type Offset) { + StringRef::size_type NextEscape = Text.find('\\'); + while (NextEscape != StringRef::npos && NextEscape < Offset) { + StringRef::size_type SequenceLength = + getEscapeSequenceLength(Text.substr(NextEscape)); + if (Offset < NextEscape + SequenceLength) + return NextEscape; + NextEscape = Text.find('\\', NextEscape + SequenceLength); + } + return Offset; +} + +BreakableToken::Split getCommentSplit(StringRef Text, + unsigned ContentStartColumn, + unsigned ColumnLimit) { if (ColumnLimit <= ContentStartColumn + 1) - return Split(StringRef::npos, 0); + return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); if (SpaceOffset == StringRef::npos || - Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) { + // Don't break at leading whitespace. + Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) SpaceOffset = Text.find(' ', MaxSplit); - } if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(); StringRef AfterCut = Text.substr(SpaceOffset).ltrim(); @@ -43,143 +98,248 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, return BreakableToken::Split(StringRef::npos, 0); } -void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, bool InPPDirective, - WhitespaceManager &Whitespaces) { - StringRef Text = getLine(LineIndex).substr(TailOffset); - StringRef AdditionalPrefix = Decoration; - if (Text.size() == Split.first + Split.second) { - // For all but the last line handle trailing space in trimLine. - if (LineIndex < Lines.size() - 1) - return; - // For the last line we need to break before "*/", but not to add "* ". - AdditionalPrefix = ""; - } +BreakableToken::Split getStringSplit(StringRef Text, + unsigned ContentStartColumn, + unsigned ColumnLimit) { - unsigned BreakOffset = Text.data() - TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix, - InPPDirective, IndentAtLineBreak); + if (ColumnLimit <= ContentStartColumn) + return BreakableToken::Split(StringRef::npos, 0); + unsigned MaxSplit = ColumnLimit - ContentStartColumn; + // FIXME: Reduce unit test case. + if (Text.empty()) + return BreakableToken::Split(StringRef::npos, 0); + MaxSplit = std::min(MaxSplit, Text.size() - 1); + StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + if (SpaceOffset != StringRef::npos && SpaceOffset != 0) + return BreakableToken::Split(SpaceOffset + 1, 0); + StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); + if (SlashOffset != StringRef::npos && SlashOffset != 0) + return BreakableToken::Split(SlashOffset + 1, 0); + StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); + if (SplitPoint == StringRef::npos || SplitPoint == 0) + return BreakableToken::Split(StringRef::npos, 0); + return BreakableToken::Split(SplitPoint, 0); } -BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, - unsigned StartColumn) - : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) { - assert(TokenText.startswith("/*") && TokenText.endswith("*/")); +} // namespace - OriginalStartColumn = - SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1; +unsigned BreakableSingleLineToken::getLineCount() const { return 1; } - TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); - - bool NeedsStar = true; - CommonPrefixLength = UINT_MAX; - if (Lines.size() == 1) { - if (Token.Parent == 0) { - // Standalone block comments will be aligned and prefixed with *s. - CommonPrefixLength = OriginalStartColumn + 1; - } else { - // Trailing comments can start on arbitrary column, and available - // horizontal space can be too small to align consecutive lines with - // the first one. We could, probably, align them to current - // indentation level, but now we just wrap them without indentation - // and stars. - CommonPrefixLength = 0; - NeedsStar = false; - } - } else { - for (size_t i = 1; i < Lines.size(); ++i) { - size_t FirstNonWhitespace = Lines[i].find_first_not_of(" "); - if (FirstNonWhitespace != StringRef::npos) { - NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*'); - CommonPrefixLength = - std::min(CommonPrefixLength, FirstNonWhitespace); - } - } - } - if (CommonPrefixLength == UINT_MAX) - CommonPrefixLength = 0; - - Decoration = NeedsStar ? "* " : ""; - - IndentAtLineBreak = - std::max(StartColumn - OriginalStartColumn + CommonPrefixLength, 0); +unsigned +BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return StartColumn + Prefix.size() + Postfix.size() + Line.size() - + TailOffset; } -void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) { - SourceLocation TokenLoc = Tok.getStartOfNonWhitespace(); - int IndentDelta = (StartColumn - 2) - OriginalStartColumn; - if (IndentDelta > 0) { - std::string WhiteSpace(IndentDelta, ' '); - for (size_t i = 1; i < Lines.size(); ++i) { - Whitespaces.addReplacement( - TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0, - WhiteSpace); - } - } else if (IndentDelta < 0) { - std::string WhiteSpace(-IndentDelta, ' '); - // Check that the line is indented enough. - for (size_t i = 1; i < Lines.size(); ++i) { - if (!Lines[i].startswith(WhiteSpace)) - return; - } - for (size_t i = 1; i < Lines.size(); ++i) { - Whitespaces.addReplacement( - TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), - -IndentDelta, ""); - } - } - - for (unsigned i = 1; i < Lines.size(); ++i) - Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size()); +void BreakableSingleLineToken::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + bool InPPDirective, + WhitespaceManager &Whitespaces) { + Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first, + Split.second, Postfix, Prefix, InPPDirective, + StartColumn); } -void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) { - if (LineIndex == Lines.size() - 1) - return; - StringRef Text = Lines[LineIndex].substr(TailOffset); - - // FIXME: The algorithm for trimming a line should naturally yield a - // non-change if there is nothing to trim; removing this line breaks the - // algorithm; investigate the root cause, and make sure to either document - // why exactly this is needed for remove it. - if (!Text.endswith(" ") && !InPPDirective) - return; - - StringRef TrimmedLine = Text.rtrim(); - unsigned BreakOffset = TrimmedLine.end() - TokenText.data(); - unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1; - // FIXME: It seems like we're misusing the call to breakToken to remove - // whitespace instead of breaking a token. We should make this an explicit - // call option to the WhitespaceManager, or handle trimming and alignment - // of comments completely within in the WhitespaceManger. Passing '0' here - // and relying on this not breaking assumptions of the WhitespaceManager seems - // like a bad idea. - Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective, - 0); +BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok, + unsigned StartColumn, + StringRef Prefix, + StringRef Postfix) + : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix), + Postfix(Postfix) { + assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); + Line = Tok.TokenText.substr( + Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } -BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, - unsigned StartColumn) - : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) { - assert(TokenText.startswith("//")); - Decoration = getLineCommentPrefix(TokenText); - Lines.push_back(TokenText.substr(Decoration.size())); - IndentAtLineBreak = StartColumn; - this->StartColumn += Decoration.size(); // Start column of the contents. +BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, + unsigned StartColumn) + : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {} + +BreakableToken::Split +BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit); } -StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) { +static StringRef getLineCommentPrefix(StringRef Comment) { const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; - for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i) + for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) if (Comment.startswith(KnownPrefixes[i])) return KnownPrefixes[i]; return ""; } +BreakableLineComment::BreakableLineComment(const FormatToken &Token, + unsigned StartColumn) + : BreakableSingleLineToken(Token, StartColumn, + getLineCommentPrefix(Token.TokenText), "") {} + +BreakableToken::Split +BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), + ColumnLimit); +} + +BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style, + const FormatToken &Token, + unsigned StartColumn, + unsigned OriginalStartColumn, + bool FirstInLine) + : BreakableToken(Token) { + StringRef TokenText(Token.TokenText); + assert(TokenText.startswith("/*") && TokenText.endswith("*/")); + TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); + + int IndentDelta = StartColumn - OriginalStartColumn; + bool NeedsStar = true; + LeadingWhitespace.resize(Lines.size()); + StartOfLineColumn.resize(Lines.size()); + if (Lines.size() == 1 && !FirstInLine) { + // Comments for which FirstInLine is false can start on arbitrary column, + // and available horizontal space can be too small to align consecutive + // lines with the first one. + // FIXME: We could, probably, align them to current indentation level, but + // now we just wrap them without stars. + NeedsStar = false; + } + StartOfLineColumn[0] = StartColumn + 2; + for (size_t i = 1; i < Lines.size(); ++i) { + adjustWhitespace(Style, i, IndentDelta); + if (Lines[i].empty()) + // If the last line is empty, the closing "*/" will have a star. + NeedsStar = NeedsStar && i + 1 == Lines.size(); + else + NeedsStar = NeedsStar && Lines[i][0] == '*'; + } + Decoration = NeedsStar ? "* " : ""; + IndentAtLineBreak = StartOfLineColumn[0] + 1; + for (size_t i = 1; i < Lines.size(); ++i) { + if (Lines[i].empty()) { + if (!NeedsStar && i + 1 != Lines.size()) + // For all but the last line (which always ends in */), set the + // start column to 0 if they're empty, so we do not insert + // trailing whitespace anywhere. + StartOfLineColumn[i] = 0; + continue; + } + if (NeedsStar) { + // The first line already excludes the star. + // For all other lines, adjust the line to exclude the star and + // (optionally) the first whitespace. + int Offset = Lines[i].startswith("* ") ? 2 : 1; + StartOfLineColumn[i] += Offset; + Lines[i] = Lines[i].substr(Offset); + LeadingWhitespace[i] += Offset; + } + IndentAtLineBreak = std::min(IndentAtLineBreak, StartOfLineColumn[i]); + } + DEBUG({ + for (size_t i = 0; i < Lines.size(); ++i) { + llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] + << "\n"; + } + }); +} + +void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, + unsigned LineIndex, + int IndentDelta) { + // Calculate the end of the non-whitespace text in the previous line. + size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t"); + if (EndOfPreviousLine == StringRef::npos) + EndOfPreviousLine = 0; + else + ++EndOfPreviousLine; + // Calculate the start of the non-whitespace text in the current line. + size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t"); + if (StartOfLine == StringRef::npos) + StartOfLine = Lines[LineIndex].size(); + // FIXME: Tabs are not always 8 characters. Make configurable in the style. + unsigned Column = 0; + StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine); + for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) { + if (Lines[LineIndex][i] == '\t') + Column += 8 - (Column % 8); + else + ++Column; + } + + // Adjust Lines to only contain relevant text. + Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); + Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); + // Adjust LeadingWhitespace to account all whitespace between the lines + // to the current line. + LeadingWhitespace[LineIndex] = + Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); + // Adjust the start column uniformly accross all lines. + StartOfLineColumn[LineIndex] = std::max(0, Column + IndentDelta); +} + +unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } + +unsigned +BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return getContentStartColumn(LineIndex, TailOffset) + + (Lines[LineIndex].size() - TailOffset) + + // The last line gets a "*/" postfix. + (LineIndex + 1 == Lines.size() ? 2 : 0); +} + +BreakableToken::Split +BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Lines[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), + ColumnLimit); +} + +void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, + Split Split, bool InPPDirective, + WhitespaceManager &Whitespaces) { + StringRef Text = Lines[LineIndex].substr(TailOffset); + StringRef Prefix = Decoration; + if (LineIndex + 1 == Lines.size() && + Text.size() == Split.first + Split.second) { + // For the last line we need to break before "*/", but not to add "* ". + Prefix = ""; + } + + unsigned BreakOffsetInToken = + Text.data() - Tok.TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, + InPPDirective, IndentAtLineBreak - Decoration.size()); +} + +void +BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) { + if (LineIndex == 0) + return; + StringRef Prefix = Decoration; + if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty()) + Prefix = ""; + + unsigned WhitespaceOffsetInToken = + Lines[LineIndex].data() - Tok.TokenText.data() - + LeadingWhitespace[LineIndex]; + Whitespaces.breakToken( + Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, + InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size()); +} + +unsigned +BreakableBlockComment::getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + // If we break, we always break at the predefined indent. + if (TailOffset != 0) + return IndentAtLineBreak; + return StartOfLineColumn[LineIndex]; +} + } // namespace format } // namespace clang diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h index c7ba044cf1e0..03904c2a4680 100644 --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -24,215 +24,174 @@ namespace clang { namespace format { +struct FormatStyle; + +/// \brief Base class for strategies on how to break tokens. +/// +/// FIXME: The interface seems set in stone, so we might want to just pull the +/// strategy into the class, instead of controlling it from the outside. class BreakableToken { public: - BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : Tok(Tok), StartColumn(StartColumn), - TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()), - Tok.TokenLength) {} + // Contains starting character index and length of split. + typedef std::pair Split; + virtual ~BreakableToken() {} + + /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - virtual unsigned getLineSize(unsigned Index) const = 0; + + /// \brief Returns the rest of the length of the line at \p LineIndex, + /// when broken at \p TailOffset. + /// + /// Note that previous breaks are not taken into account. \p TailOffset + /// is always specified from the start of the (original) line. virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset) const = 0; - // Contains starting character index and length of split. - typedef std::pair Split; + /// \brief Returns a range (offset, length) at which to break the line at + /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not + /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const = 0; + + /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, bool InPPDirective, WhitespaceManager &Whitespaces) = 0; - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) {} + + /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) {} + protected: + BreakableToken(const FormatToken &Tok) : Tok(Tok) {} + const FormatToken &Tok; - unsigned StartColumn; - StringRef TokenText; }; -class BreakableStringLiteral : public BreakableToken { +/// \brief Base class for single line tokens that can be broken. +/// +/// \c getSplit() needs to be implemented by child classes. +class BreakableSingleLineToken : public BreakableToken { public: - BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) { - assert(TokenText.startswith("\"") && TokenText.endswith("\"")); - } - - virtual unsigned getLineCount() const { return 1; } - - virtual unsigned getLineSize(unsigned Index) const { - return Tok.TokenLength - 2; // Should be in sync with getLine - } - + virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getDecorationLength() + getLine().size() - TailOffset; - } - - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine().substr(TailOffset); - if (ColumnLimit <= getDecorationLength()) - return Split(StringRef::npos, 0); - unsigned MaxSplit = ColumnLimit - getDecorationLength(); - // FIXME: Reduce unit test case. - if (Text.empty()) - return Split(StringRef::npos, 0); - MaxSplit = std::min(MaxSplit, Text.size() - 1); - StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); - if (SpaceOffset != StringRef::npos && SpaceOffset != 0) - return Split(SpaceOffset + 1, 0); - StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); - if (SlashOffset != StringRef::npos && SlashOffset != 0) - return Split(SlashOffset + 1, 0); - StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); - if (SplitPoint != StringRef::npos && SplitPoint > 1) - // Do not split at 0. - return Split(SplitPoint, 0); - return Split(StringRef::npos, 0); - } - - virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) { - Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second, - "\"", "\"", InPPDirective, StartColumn); - } - -private: - StringRef getLine() const { - // Get string without quotes. - // FIXME: Handle string prefixes. - return TokenText.substr(1, TokenText.size() - 2); - } - - unsigned getDecorationLength() const { return StartColumn + 2; } - - static StringRef::size_type getStartOfCharacter(StringRef Text, - StringRef::size_type Offset) { - StringRef::size_type NextEscape = Text.find('\\'); - while (NextEscape != StringRef::npos && NextEscape < Offset) { - StringRef::size_type SequenceLength = - getEscapeSequenceLength(Text.substr(NextEscape)); - if (Offset < NextEscape + SequenceLength) - return NextEscape; - NextEscape = Text.find('\\', NextEscape + SequenceLength); - } - return Offset; - } - - static unsigned getEscapeSequenceLength(StringRef Text) { - assert(Text[0] == '\\'); - if (Text.size() < 2) - return 1; - - switch (Text[1]) { - case 'u': - return 6; - case 'U': - return 10; - case 'x': - return getHexLength(Text); - default: - if (Text[1] >= '0' && Text[1] <= '7') - return getOctalLength(Text); - return 2; - } - } - - static unsigned getHexLength(StringRef Text) { - unsigned I = 2; // Point after '\x'. - while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || - (Text[I] >= 'a' && Text[I] <= 'f') || - (Text[I] >= 'A' && Text[I] <= 'F'))) { - ++I; - } - return I; - } - - static unsigned getOctalLength(StringRef Text) { - unsigned I = 1; - while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { - ++I; - } - return I; - } - -}; - -class BreakableComment : public BreakableToken { -public: - virtual unsigned getLineSize(unsigned Index) const { - return getLine(Index).size(); - } - - virtual unsigned getLineCount() const { return Lines.size(); } - - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getContentStartColumn(LineIndex, TailOffset) + - getLine(LineIndex).size() - TailOffset; - } - - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const; + unsigned TailOffset) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, bool InPPDirective, WhitespaceManager &Whitespaces); protected: - BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) {} + BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix); - // Get comment lines without /* */, common prefix and trailing whitespace. - // Last line is not trimmed, as it is terminated by */, so its trailing - // whitespace is not really trailing. - StringRef getLine(unsigned Index) const { - return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index]; - } - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - return (TailOffset == 0 && LineIndex == 0) - ? StartColumn - : IndentAtLineBreak + Decoration.size(); - } - - unsigned IndentAtLineBreak; - StringRef Decoration; - SmallVector Lines; + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; }; -class BreakableBlockComment : public BreakableComment { +class BreakableStringLiteral : public BreakableSingleLineToken { public: - BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); + /// \brief Creates a breakable token for a single line string literal. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn); - void alignLines(WhitespaceManager &Whitespaces); + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; +}; +class BreakableLineComment : public BreakableSingleLineToken { +public: + /// \brief Creates a breakable token for a line comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting. + BreakableLineComment(const FormatToken &Token, unsigned StartColumn); + + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; +}; + +class BreakableBlockComment : public BreakableToken { +public: + /// \brief Creates a breakable token for a block comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting, while \p OriginalStartColumn specifies in which + /// column the comment started before formatting. + /// If the comment starts a line after formatting, set \p FirstInLine to true. + BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token, + unsigned StartColumn, unsigned OriginaStartColumn, + bool FirstInLine); + + virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) + - (LineIndex + 1 < Lines.size() ? 0 : 2); - } - - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, WhitespaceManager &Whitespaces); + unsigned TailOffset) const; + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, WhitespaceManager &Whitespaces); + virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned InPPDirective, + WhitespaceManager &Whitespaces); private: - unsigned OriginalStartColumn; - unsigned CommonPrefixLength; -}; + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], + // so that all whitespace between the lines is accounted to Lines[LineIndex] + // as leading whitespace: + // - Lines[LineIndex] points to the text after that whitespace + // - Lines[LineIndex-1] shrinks by its trailing whitespace + // - LeadingWhitespace[LineIndex] is updated with the complete whitespace + // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // + // Sets StartOfLineColumn to the intended column in which the text at + // Lines[LineIndex] starts (note that the decoration, if present, is not + // considered part of the text). + void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex, + int IndentDelta); -class BreakableLineComment : public BreakableComment { -public: - BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); + // Returns the column at which the text in line LineIndex starts, when broken + // at TailOffset. Note that the decoration (if present) is not considered part + // of the text. + unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; -private: - static StringRef getLineCommentPrefix(StringRef Comment); + // Contains the text of the lines of the block comment, excluding the leading + // /* in the first line and trailing */ in the last line, and excluding all + // trailing whitespace between the lines. Note that the decoration (if + // present) is also not considered part of the text. + SmallVector Lines; + + // LeadingWhitespace[i] is the number of characters regarded as whitespace in + // front of Lines[i]. Note that this can include "* " sequences, which we + // regard as whitespace when all lines have a "*" prefix. + SmallVector LeadingWhitespace; + + // StartOfLineColumn[i] is the target column at which Line[i] should be. + // Note that this excludes a leading "* " or "*" in case all lines have + // a "*" prefix. + SmallVector StartOfLineColumn; + + // The column at which the text of a broken line should start. + // Note that an optional decoration would go before that column. + // IndentAtLineBreak is a uniform position for all lines in a block comment, + // regardless of their relative position. + // FIXME: Revisit the decision to do this; the main reason was to support + // patterns like + // /**************//** + // * Comment + // We could also support such patterns by special casing the first line + // instead. + unsigned IndentAtLineBreak; + + // Either "* " if all lines begin with a "*", or empty. + StringRef Decoration; }; } // namespace format diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index b4ea90462a83..ea8a0f9f7d10 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -802,6 +802,9 @@ private: unsigned UnbreakableTailLength = Current.UnbreakableTailLength; llvm::OwningPtr Token; unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; + unsigned OriginalStartColumn = SourceMgr.getSpellingColumnNumber( + Current.FormatTok.getStartOfNonWhitespace()) - 1; + if (Current.is(tok::string_literal) && Current.Type != TT_ImplicitStringLiteral) { // Only break up default narrow strings. @@ -810,18 +813,16 @@ private: if (!LiteralData || *LiteralData != '"') return 0; - Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok, - StartColumn)); + Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn)); } else if (Current.Type == TT_BlockComment) { BreakableBlockComment *BBC = - new BreakableBlockComment(SourceMgr, Current, StartColumn); - if (!DryRun) - BBC->alignLines(Whitespaces); + new BreakableBlockComment(Style, Current.FormatTok, StartColumn, + OriginalStartColumn, !Current.Parent); Token.reset(BBC); } else if (Current.Type == TT_LineComment && (Current.Parent == NULL || Current.Parent->Type != TT_ImplicitStringLiteral)) { - Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn)); + Token.reset(new BreakableLineComment(Current.FormatTok, StartColumn)); } else { return 0; } @@ -832,8 +833,12 @@ private: bool BreakInserted = false; unsigned Penalty = 0; unsigned PositionAfterLastLineInToken = 0; - for (unsigned LineIndex = 0; LineIndex < Token->getLineCount(); - ++LineIndex) { + for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); + LineIndex != EndIndex; ++LineIndex) { + if (!DryRun) { + Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective, + Whitespaces); + } unsigned TailOffset = 0; unsigned RemainingTokenLength = Token->getLineLengthAfterSplit(LineIndex, TailOffset); @@ -845,8 +850,7 @@ private: assert(Split.first != 0); unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit( LineIndex, TailOffset + Split.first + Split.second); - if (NewRemainingTokenLength >= RemainingTokenLength) - break; + assert(NewRemainingTokenLength < RemainingTokenLength); if (!DryRun) { Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective, Whitespaces); @@ -857,9 +861,6 @@ private: BreakInserted = true; } PositionAfterLastLineInToken = RemainingTokenLength; - if (!DryRun) { - Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces); - } } if (BreakInserted) { @@ -1084,8 +1085,8 @@ private: class LexerBasedFormatTokenSource : public FormatTokenSource { public: LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) - : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), - IdentTable(Lex.getLangOpts()) { + : GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), + SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()) { Lex.SetKeepWhitespaceMode(true); } @@ -1102,12 +1103,13 @@ public: FormatTok = FormatToken(); Lex.LexFromRawLexer(FormatTok.Tok); StringRef Text = rawTokenText(FormatTok.Tok); - SourceLocation WhitespaceStart = FormatTok.Tok.getLocation(); + SourceLocation WhitespaceStart = + FormatTok.Tok.getLocation().getLocWithOffset(-TrailingWhitespace); if (SourceMgr.getFileOffset(WhitespaceStart) == 0) FormatTok.IsFirst = true; // Consume and record whitespace until we find a significant token. - unsigned WhitespaceLength = 0; + unsigned WhitespaceLength = TrailingWhitespace; while (FormatTok.Tok.is(tok::unknown)) { unsigned Newlines = Text.count('\n'); if (Newlines > 0) @@ -1130,9 +1132,10 @@ public: // Now FormatTok is the next non-whitespace token. FormatTok.TokenLength = Text.size(); + TrailingWhitespace = 0; if (FormatTok.Tok.is(tok::comment)) { - FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size(); - FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength; + TrailingWhitespace = Text.size() - Text.rtrim().size(); + FormatTok.TokenLength -= TrailingWhitespace; } // In case the token starts with escaped newlines, we want to @@ -1163,6 +1166,9 @@ public: FormatTok.WhitespaceRange = SourceRange( WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); + FormatTok.TokenText = StringRef( + SourceMgr.getCharacterData(FormatTok.getStartOfNonWhitespace()), + FormatTok.TokenLength); return FormatTok; } @@ -1171,6 +1177,7 @@ public: private: FormatToken FormatTok; bool GreaterStashed; + unsigned TrailingWhitespace; Lexer &Lex; SourceManager &SourceMgr; IdentifierTable IdentTable; diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index c738ef13c6d4..3359745a9ff2 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -28,9 +28,8 @@ namespace format { /// whitespace characters preceeding it. struct FormatToken { FormatToken() - : NewlinesBefore(0), HasUnescapedNewline(false), - LastNewlineOffset(0), TokenLength(0), IsFirst(false), - MustBreakBefore(false), TrailingWhiteSpaceLength(0) {} + : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), + TokenLength(0), IsFirst(false), MustBreakBefore(false) {} /// \brief The \c Token. Token Tok; @@ -66,9 +65,6 @@ struct FormatToken { /// before the token. bool MustBreakBefore; - /// \brief Number of characters of trailing whitespace. - unsigned TrailingWhiteSpaceLength; - /// \brief Returns actual token start location without leading escaped /// newlines and whitespace. /// @@ -77,6 +73,12 @@ struct FormatToken { SourceLocation getStartOfNonWhitespace() const { return WhitespaceRange.getEnd(); } + + /// \brief The raw text of the token. + /// + /// Contains the raw token text without leading whitespace and without leading + /// escaped newlines. + StringRef TokenText; }; /// \brief An unwrapped line is a sequence of \c Token, that we would like to diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index f7b65fc72cf2..daf63697b0ce 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -46,19 +46,6 @@ void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok, true, Tok.FormatTok.WhitespaceRange, Spaces, StartOfTokenColumn, Newlines, "", "", Tok.FormatTok.Tok.getKind(), InPPDirective && !Tok.FormatTok.IsFirst)); - - // Align line comments if they are trailing or if they continue other - // trailing comments. - // FIXME: Pull this out and generalize so it works the same way in broken - // comments and unbroken comments with trailing whitespace. - if (Tok.isTrailingComment()) { - SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace() - .getLocWithOffset(Tok.FormatTok.TokenLength); - // Remove the comment's trailing whitespace. - if (Tok.FormatTok.TrailingWhiteSpaceLength != 0) - Replaces.insert(tooling::Replacement( - SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, "")); - } } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, @@ -86,12 +73,6 @@ void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset, tok::unknown, InPPDirective && !Tok.IsFirst)); } -void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc, - unsigned ReplaceChars, StringRef Text) { - Replaces.insert( - tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text)); -} - const tooling::Replacements &WhitespaceManager::generateReplacements() { if (Changes.empty()) return Replaces; diff --git a/clang/lib/Format/WhitespaceManager.h b/clang/lib/Format/WhitespaceManager.h index 701ecff4e055..cdd8f1d10f3f 100644 --- a/clang/lib/Format/WhitespaceManager.h +++ b/clang/lib/Format/WhitespaceManager.h @@ -67,14 +67,6 @@ public: /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); - /// \brief Replaces \p ReplaceChars after \p SourceLoc with \p Text. - /// - /// FIXME: This is currently used to align comments outside of the \c - /// WhitespaceManager. Once this has been moved inside, get rid of this - /// method. - void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars, - StringRef Text); - private: /// \brief Represents a change before a token, a break inside a token, /// or the layout of an unchanged token (or whitespace within). diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 73fea60519fe..d237c0b2f865 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -766,8 +766,8 @@ TEST_F(FormatTest, AlignsMultiLineComments) { " 1.1.1. to keep the formatting.\n" " */")); EXPECT_EQ("/*\n" - " Don't try to outdent if there's not enough inentation.\n" - " */", + "Don't try to outdent if there's not enough inentation.\n" + "*/", format(" /*\n" " Don't try to outdent if there's not enough inentation.\n" " */")); @@ -808,6 +808,65 @@ TEST_F(FormatTest, SplitsLongCxxComments) { format("// A comment before a macro definition\n" "#define a b", getLLVMStyleWithColumns(20))); + + EXPECT_EQ("/* A comment before\n" + " * a macro\n" + " * definition */\n" + "#define a b", + format("/* A comment before a macro definition */\n" + "#define a b", + getLLVMStyleWithColumns(20))); + + EXPECT_EQ("/* some comment\n" + " * a comment\n" + "* that we break\n" + " * another comment\n" + "* we have to break\n" + "* a left comment\n" + " */", + format(" /* some comment\n" + " * a comment that we break\n" + " * another comment we have to break\n" + "* a left comment\n" + " */", + getLLVMStyleWithColumns(20))); + + EXPECT_EQ("/*\n" + "\n" + "\n" + " */\n", + format(" /* \n" + " \n" + " \n" + " */\n")); +} + +TEST_F(FormatTest, MultiLineCommentsInDefines) { + // FIXME: The line breaks are still suboptimal (current guess + // is that this is due to the token length being misused), but + // the comment handling is correct. + EXPECT_EQ("#define A( \\\n" + " x) /* \\\n" + "a comment \\\n" + "inside */ \\\n" + " f();", + format("#define A(x) /* \\\n" + " a comment \\\n" + " inside */ \\\n" + " f();", + getLLVMStyleWithColumns(17))); + EXPECT_EQ("#define A(x) /* \\\n" + " a \\\n" + " comment \\\n" + " inside \\\n" + " */ \\\n" + " f();", + format("#define A( \\\n" + " x) /* \\\n" + " a comment \\\n" + " inside */ \\\n" + " f();", + getLLVMStyleWithColumns(17))); } TEST_F(FormatTest, ParsesCommentsAdjacentToPPDirectives) { @@ -928,7 +987,8 @@ TEST_F(FormatTest, SplitsLongLinesInComments) { " */", getLLVMStyleWithColumns(20))); EXPECT_EQ("{\n" " if (something) /* This is a\n" - "long comment */\n" + " long\n" + " comment */\n" " ;\n" "}", format("{\n" @@ -4435,6 +4495,35 @@ TEST_F(FormatTest, ConfigurableUseOfTab) { "\t\t parameter2); \\\n" "\t}", Tab); + EXPECT_EQ("/*\n" + "\t a\t\tcomment\n" + "\t in multiple lines\n" + " */", + format(" /*\t \t \n" + " \t \t a\t\tcomment\t \t\n" + " \t \t in multiple lines\t\n" + " \t */", + Tab)); + Tab.UseTab = false; + // FIXME: Change this test to a different tab size than + // 8 once configurable. + EXPECT_EQ("/*\n" + " a\t\tcomment\n" + " in multiple lines\n" + " */", + format(" /*\t \t \n" + " \t \t a\t\tcomment\t \t\n" + " \t \t in multiple lines\t\n" + " \t */", + Tab)); + + // FIXME: This is broken, as the spelling column number we + // get from the SourceManager counts tab as '1'. + // EXPECT_EQ("/* some\n" + // " comment */", + // format(" \t \t /* some\n" + // " \t \t comment */", + // Tab)); } TEST_F(FormatTest, LinuxBraceBreaking) {