clang-format: Add column layout formatting for braced lists

With this patch, braced lists (with more than 3 elements are formatted in a
column layout if possible). E.g.:

  static const uint16_t CallerSavedRegs64Bit[] = {
    X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
    X86::R8,  X86::R9,  X86::R10, X86::R11, 0
  };

Required other changes:
- FormatTokens can now have a special role that contains extra data and can do
  special formattings. A comma separated list is currently the only
  implementation.
- Move penalty calculation entirely into ContinuationIndenter (there was a last
  piece still in UnwrappedLineFormatter).

Review: http://llvm-reviews.chandlerc.com/D1457
llvm-svn: 189018
This commit is contained in:
Daniel Jasper 2013-08-22 15:00:41 +00:00
parent effabf9691
commit 8de9ed05b7
8 changed files with 354 additions and 34 deletions

View File

@ -4,6 +4,7 @@ add_clang_library(clangFormat
BreakableToken.cpp
ContinuationIndenter.cpp
Format.cpp
FormatToken.cpp
TokenAnnotator.cpp
UnwrappedLineParser.cpp
WhitespaceManager.cpp

View File

@ -176,13 +176,14 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
}
unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
bool DryRun) {
bool DryRun,
unsigned ExtraSpaces) {
const FormatToken &Current = *State.NextToken;
const FormatToken &Previous = *State.NextToken->Previous;
// Extra penalty that needs to be added because of the way certain line
// breaks are chosen.
unsigned ExtraPenalty = 0;
unsigned Penalty = 0;
if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
// FIXME: Is this correct?
@ -199,13 +200,20 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
unsigned ContinuationIndent =
std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
if (Newline) {
// The first line break on any ParenLevel causes an extra penalty in order
// prefer similar line breaks.
if (!State.Stack.back().ContainsLineBreak)
Penalty += 15;
State.Stack.back().ContainsLineBreak = true;
Penalty += State.NextToken->SplitPenalty;
// Breaking before the first "<<" is generally not desirable if the LHS is
// short.
if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 &&
State.Column <= Style.ColumnLimit / 2)
ExtraPenalty += Style.PenaltyBreakFirstLessLess;
Penalty += Style.PenaltyBreakFirstLessLess;
State.Stack.back().ContainsLineBreak = true;
if (Current.is(tok::r_brace)) {
if (Current.BlockKind == BK_BracedInit)
State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
@ -333,7 +341,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
State.Stack.back().LastSpace = State.Stack.back().VariablePos;
}
unsigned Spaces = State.NextToken->SpacesRequiredBefore;
unsigned Spaces = State.NextToken->SpacesRequiredBefore + ExtraSpaces;
if (!DryRun)
Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column + Spaces);
@ -395,7 +403,7 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
}
}
return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty;
return moveStateToNextToken(State, DryRun, Newline) + Penalty;
}
unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
@ -542,11 +550,20 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.NextToken = State.NextToken->Next;
if (!Newline && Style.AlwaysBreakBeforeMultilineStrings &&
Current.is(tok::string_literal) && Current.CanBreakBefore)
return 0;
unsigned Penalty = 0;
if (Newline || !Style.AlwaysBreakBeforeMultilineStrings ||
Current.isNot(tok::string_literal) || !Current.CanBreakBefore)
Penalty += breakProtrudingToken(Current, State, DryRun);
return breakProtrudingToken(Current, State, DryRun);
// If the previous has a special role, let it consume tokens as appropriate.
// It is necessary to start at the previous token for the only implemented
// role (comma separated list). That way, the decision whether or not to break
// after the "{" is already done and both options are tried and evaluated.
// FIXME: This is ugly, find a better way.
if (Previous && Previous->Role)
Penalty += Previous->Role->format(State, this, DryRun);
return Penalty;
}
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,

View File

@ -60,7 +60,8 @@ public:
///
/// If \p DryRun is \c false, also creates and stores the required
/// \c Replacement.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun);
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
unsigned ExtraSpaces = 0);
/// \brief Get the column limit for this line. This is the style's column
/// limit, potentially reduced for preprocessor definitions.

View File

@ -438,14 +438,14 @@ private:
}
for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
I != E; ++I) {
unsigned Penalty = Indenter->addTokenToState(State, (*I)->NewLine, false);
DEBUG({
if ((*I)->NewLine) {
llvm::dbgs() << "Penalty for splitting before "
llvm::dbgs() << "Penalty for placing "
<< (*I)->Previous->State.NextToken->Tok.getName() << ": "
<< (*I)->Previous->State.NextToken->SplitPenalty << "\n";
<< Penalty << "\n";
}
});
Indenter->addTokenToState(State, (*I)->NewLine, false);
}
}
@ -459,11 +459,6 @@ private:
return;
if (!NewLine && Indenter->mustBreak(PreviousNode->State))
return;
if (NewLine) {
if (!PreviousNode->State.Stack.back().ContainsLineBreak)
Penalty += 15;
Penalty += PreviousNode->State.NextToken->SplitPenalty;
}
StateNode *Node = new (Allocator.Allocate())
StateNode(PreviousNode->State, NewLine, PreviousNode);

View File

@ -0,0 +1,184 @@
//===--- FormatToken.cpp - Format C++ code --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file implements specific functions of \c FormatTokens and their
/// roles.
///
//===----------------------------------------------------------------------===//
#include "FormatToken.h"
#include "ContinuationIndenter.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
namespace clang {
namespace format {
TokenRole::~TokenRole() {}
void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
unsigned CommaSeparatedList::format(LineState &State,
ContinuationIndenter *Indenter,
bool DryRun) {
if (!State.NextToken->Previous || !State.NextToken->Previous->Previous ||
Commas.size() <= 2)
return 0;
// Ensure that we start on the opening brace.
const FormatToken *LBrace = State.NextToken->Previous->Previous;
if (LBrace->isNot(tok::l_brace) ||
LBrace->Next->Type == TT_DesignatedInitializerPeriod)
return 0;
// Find the best ColumnFormat, i.e. the best number of columns to use.
unsigned RemainingCharacters = Style.ColumnLimit - State.Stack.back().Indent;
const ColumnFormat *Format = getColumnFormat(RemainingCharacters);
if (!Format)
return 0;
// Format the entire list.
unsigned Penalty = 0;
unsigned Column = 0;
unsigned Item = 0;
while (State.NextToken != LBrace->MatchingParen) {
bool NewLine = false;
unsigned ExtraSpaces = 0;
// If the previous token was one of our commas, we are now on the next item.
if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
if (!State.NextToken->isTrailingComment()) {
ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
++Column;
}
++Item;
}
if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
Column = 0;
NewLine = true;
}
// Place token using the continuation indenter and store the penalty.
Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
}
return Penalty;
}
// Returns the lengths in code points between Begin and End (both included),
// assuming that the entire sequence is put on a single line.
static unsigned CodePointsBetween(const FormatToken *Begin,
const FormatToken *End) {
return End->TotalLength - Begin->TotalLength + Begin->CodePointCount;
}
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
if (!Token->MatchingParen)
return;
FormatToken *ItemBegin = Token->Next;
SmallVector<bool, 8> MustBreakBeforeItem;
// The lengths of an item if it is put at the end of the line. This includes
// trailing comments which are otherwise ignored for column alignment.
SmallVector<unsigned, 8> EndOfLineItemLength;
for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
// Skip comments on their own line.
while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
ItemBegin = ItemBegin->Next;
MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
const FormatToken *ItemEnd = NULL;
if (i == Commas.size()) {
ItemEnd = Token->MatchingParen;
const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
if (Style.Cpp11BracedListStyle) {
// In Cpp11 braced list style, the } and possibly other subsequent
// tokens will need to stay on a line with the last element.
while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
ItemEnd = ItemEnd->Next;
} else {
// In other braced lists styles, the "}" can be wrapped to the new line.
ItemEnd = Token->MatchingParen->Previous;
}
} else {
ItemEnd = Commas[i];
// The comma is counted as part of the item when calculating the length.
ItemLengths.push_back(ItemEnd->TotalLength - ItemBegin->TotalLength +
ItemBegin->CodePointCount);
// Consume trailing comments so the are included in EndOfLineItemLength.
if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
ItemEnd->Next->isTrailingComment())
ItemEnd = ItemEnd->Next;
}
EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
ItemBegin = ItemEnd->Next;
}
// We can never place more than ColumnLimit / 3 items in a row (because of the
// spaces and the comma).
for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
ColumnFormat Format;
Format.Columns = Columns;
Format.ColumnSizes.resize(Columns);
Format.LineCount = 0;
bool HasRowWithSufficientColumns = false;
unsigned Column = 0;
for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
if (MustBreakBeforeItem[i] || Column == Columns) {
++Format.LineCount;
Column = 0;
}
if (Column == Columns - 1)
HasRowWithSufficientColumns = true;
unsigned length =
(Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
Format.ColumnSizes[Column] =
std::max(Format.ColumnSizes[Column], length);
++Column;
}
// If all rows are terminated early (e.g. by trailing comments), we don't
// need to look further.
if (!HasRowWithSufficientColumns)
break;
Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
for (unsigned i = 0; i < Columns; ++i) {
Format.TotalWidth += Format.ColumnSizes[i];
}
// Ignore layouts that are bound to violate the column limit.
if (Format.TotalWidth > Style.ColumnLimit)
continue;
Formats.push_back(Format);
}
}
const CommaSeparatedList::ColumnFormat *
CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
const ColumnFormat *BestFormat = NULL;
for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
I = Formats.rbegin(),
E = Formats.rend();
I != E; ++I) {
if (I->TotalWidth <= RemainingCharacters) {
if (BestFormat && I->LineCount > BestFormat->LineCount)
break;
BestFormat = &*I;
}
}
return BestFormat;
}
} // namespace format
} // namespace clang

View File

@ -17,7 +17,9 @@
#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/OwningPtr.h"
namespace clang {
namespace format {
@ -72,6 +74,8 @@ enum ParameterPackingKind {
PPK_Inconclusive
};
class TokenRole;
/// \brief A wrapper around a \c Token storing information about the
/// whitespace characters preceeding it.
struct FormatToken {
@ -143,7 +147,10 @@ struct FormatToken {
TokenType Type;
/// \brief The number of spaces that should be inserted before this token.
unsigned SpacesRequiredBefore;
/// \brief \c true if it is allowed to break before this token.
bool CanBreakBefore;
bool ClosesTemplateDeclaration;
@ -155,6 +162,10 @@ struct FormatToken {
/// the number of commas.
unsigned ParameterCount;
/// \brief A token can have a special role that can carry extra information
/// about the token's formatting.
llvm::OwningPtr<TokenRole> Role;
/// \brief If this is an opening parenthesis, how are the parameters packed?
ParameterPackingKind PackingKind;
@ -300,6 +311,78 @@ private:
void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
};
class ContinuationIndenter;
struct LineState;
class TokenRole {
public:
TokenRole(const FormatStyle &Style) : Style(Style) {}
virtual ~TokenRole();
/// \brief After the \c TokenAnnotator has finished annotating all the tokens,
/// this function precomputes required information for formatting.
virtual void precomputeFormattingInfos(const FormatToken *Token);
/// \brief Apply the special formatting that the given role demands.
///
/// Continues formatting from \p State leaving indentation to \p Indenter and
/// returns the total penalty that this formatting incurs.
virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
bool DryRun) {
return 0;
}
/// \brief Notifies the \c Role that a comma was found.
virtual void CommaFound(const FormatToken *Token) {}
protected:
const FormatStyle &Style;
};
class CommaSeparatedList : public TokenRole {
public:
CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
virtual void precomputeFormattingInfos(const FormatToken *Token);
virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
bool DryRun);
/// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
private:
/// \brief A struct that holds information on how to format a given list with
/// a specific number of columns.
struct ColumnFormat {
/// \brief The number of columns to use.
unsigned Columns;
/// \brief The total width in characters.
unsigned TotalWidth;
/// \brief The number of lines required for this format.
unsigned LineCount;
/// \brief The size of each column in characters.
SmallVector<unsigned, 8> ColumnSizes;
};
/// \brief Calculate which \c ColumnFormat fits best into
/// \p RemainingCharacters.
const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
/// \brief The ordered \c FormatTokens making up the commas of this list.
SmallVector<const FormatToken *, 8> Commas;
/// \brief The length of each of the list's items in characters including the
/// trailing comma.
SmallVector<unsigned, 8> ItemLengths;
/// \brief Precomputed formats that can be used for this list.
SmallVector<ColumnFormat, 4> Formats;
};
} // namespace format
} // namespace clang

View File

@ -29,9 +29,11 @@ namespace {
/// into template parameter lists.
class AnnotatingParser {
public:
AnnotatingParser(AnnotatedLine &Line, IdentifierInfo &Ident_in)
: Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false),
NameFound(false), AutoFound(false), Ident_in(Ident_in) {
AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
IdentifierInfo &Ident_in)
: Style(Style), Line(Line), CurrentToken(Line.First),
KeywordVirtualFound(false), NameFound(false), AutoFound(false),
Ident_in(Ident_in) {
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
}
@ -268,6 +270,9 @@ private:
void updateParameterCount(FormatToken *Left, FormatToken *Current) {
if (Current->is(tok::comma)) {
++Left->ParameterCount;
if (!Left->Role)
Left->Role.reset(new CommaSeparatedList(Style));
Left->Role->CommaFound(Current);
} else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
Left->ParameterCount = 1;
}
@ -827,6 +832,7 @@ private:
SmallVector<Context, 8> Contexts;
const FormatStyle &Style;
AnnotatedLine &Line;
FormatToken *CurrentToken;
bool KeywordVirtualFound;
@ -937,7 +943,7 @@ private:
} // end anonymous namespace
void TokenAnnotator::annotate(AnnotatedLine &Line) {
AnnotatingParser Parser(Line, Ident_in);
AnnotatingParser Parser(Style, Line, Ident_in);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
@ -1007,6 +1013,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
}
calculateUnbreakableTailLengths(Line);
for (Current = Line.First; Current != NULL; Current = Current->Next) {
if (Current->Role)
Current->Role->precomputeFormattingInfos(Current);
}
DEBUG({
printDebugInfo(Line);
});

View File

@ -1691,16 +1691,6 @@ TEST_F(FormatTest, StaticInitializers) {
" 100000000, \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"\n"
"};");
verifyFormat(
"static SomeClass = { a, b, c, d, e, f, g, h, i, j,\n"
" looooooooooooooooooooooooooooooooooongname,\n"
" looooooooooooooooooooooooooooooong };");
// Allow bin-packing in static initializers as this would often lead to
// terrible results, e.g.:
verifyGoogleFormat(
"static SomeClass = {a, b, c, d, e, f, g, h, i, j,\n"
" looooooooooooooooooooooooooooooooooongname,\n"
" looooooooooooooooooooooooooooooong};");
// Here, everything other than the "}" would fit on a line.
verifyFormat("static int LooooooooooooooooooooooooongVariable[1] = {\n"
" 100000000000000000000000\n"
@ -1782,7 +1772,7 @@ TEST_F(FormatTest, NestedStaticInitializers) {
"struct {\n"
" unsigned bit;\n"
" const char *const name;\n"
"} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
"} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
" { kOsLinux, \"Linux\" }, { kOsCrOS, \"Chrome OS\" } };");
}
@ -4152,6 +4142,44 @@ TEST_F(FormatTest, LayoutCxx11ConstructorBraceInitializers) {
NoSpaces);
}
TEST_F(FormatTest, FormatsBracedListsinColumnLayout) {
verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777 };");
verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" // line comment\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555,\n"
" // line comment\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777 };");
verifyFormat(
"vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
" 1, 22, 333, 4444, 55555, 666666, // comment\n"
" 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
" 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
" 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
" 7777777 };");
verifyFormat("static const uint16_t CallerSavedRegs64Bittttt[] = {\n"
" X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,\n"
" X86::R8, X86::R9, X86::R10, X86::R11, 0\n"
"};");
verifyFormat("vector<int> x = { 1, 1, 1, 1,\n"
" 1, 1, 1, 1 };",
getLLVMStyleWithColumns(39));
verifyFormat("vector<int> x = { 1, 1, 1, 1,\n"
" 1, 1, 1, 1 };",
getLLVMStyleWithColumns(38));
verifyFormat("vector<int> aaaaaaaaaaaaaaaaaaaaaa = {\n"
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1\n"
"};",
getLLVMStyleWithColumns(40));
}
TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
verifyFormat("void f() { return 42; }");
verifyFormat("void f() {\n"