forked from OSchip/llvm-project
Better trade-off for excess characters vs. staying within the column limits.
When we break a long line like: Column limit: 21 | // foo foo foo foo foo foo foo foo foo foo foo foo The local decision when to allow protruding vs. breaking can lead to this outcome (2 excess characters, 2 breaks): // foo foo foo foo foo // foo foo foo foo foo // foo foo While strictly staying within the column limit leads to this strictly better outcome (fully below the column limit, 2 breaks): // foo foo foo foo // foo foo foo foo // foo foo foo foo To get an optimal solution, we would need to consider all combinations of excess characters vs. breaking for all lines, but that would lead to a significant increase in the search space of the algorithm for little gain. Instead, we blindly try both approches and·select the one that leads to the overall lower penalty. Differential Revision: https://reviews.llvm.org/D40605 llvm-svn: 319541
This commit is contained in:
parent
2dc4ff1cde
commit
0b58c328d4
|
@ -1390,7 +1390,36 @@ unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
|
|||
Penalty = addMultilineToken(Current, State);
|
||||
} else if (State.Line->Type != LT_ImportStatement) {
|
||||
// We generally don't break import statements.
|
||||
Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun);
|
||||
LineState OriginalState = State;
|
||||
|
||||
// Whether we force the reflowing algorithm to stay strictly within the
|
||||
// column limit.
|
||||
bool Strict = false;
|
||||
// Whether the first non-strict attempt at reflowing did intentionally
|
||||
// exceed the column limit.
|
||||
bool Exceeded = false;
|
||||
std::tie(Penalty, Exceeded) = breakProtrudingToken(
|
||||
Current, State, AllowBreak, /*DryRun=*/true, Strict);
|
||||
if (Exceeded) {
|
||||
// If non-strict reflowing exceeds the column limit, try whether strict
|
||||
// reflowing leads to an overall lower penalty.
|
||||
LineState StrictState = OriginalState;
|
||||
unsigned StrictPenalty =
|
||||
breakProtrudingToken(Current, StrictState, AllowBreak,
|
||||
/*DryRun=*/true, /*Strict=*/true)
|
||||
.first;
|
||||
Strict = StrictPenalty <= Penalty;
|
||||
if (Strict) {
|
||||
Penalty = StrictPenalty;
|
||||
State = StrictState;
|
||||
}
|
||||
}
|
||||
if (!DryRun) {
|
||||
// If we're not in dry-run mode, apply the changes with the decision on
|
||||
// strictness made above.
|
||||
breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
|
||||
Strict);
|
||||
}
|
||||
}
|
||||
if (State.Column > getColumnLimit(State)) {
|
||||
unsigned ExcessCharacters = State.Column - getColumnLimit(State);
|
||||
|
@ -1480,14 +1509,14 @@ std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
||||
LineState &State,
|
||||
bool AllowBreak,
|
||||
bool DryRun) {
|
||||
std::pair<unsigned, bool>
|
||||
ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
||||
LineState &State, bool AllowBreak,
|
||||
bool DryRun, bool Strict) {
|
||||
std::unique_ptr<const BreakableToken> Token =
|
||||
createBreakableToken(Current, State, AllowBreak);
|
||||
if (!Token)
|
||||
return 0;
|
||||
return {0, false};
|
||||
assert(Token->getLineCount() > 0);
|
||||
unsigned ColumnLimit = getColumnLimit(State);
|
||||
if (Current.is(TT_LineComment)) {
|
||||
|
@ -1495,13 +1524,16 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
ColumnLimit = Style.ColumnLimit;
|
||||
}
|
||||
if (Current.UnbreakableTailLength >= ColumnLimit)
|
||||
return 0;
|
||||
return {0, false};
|
||||
// ColumnWidth was already accounted into State.Column before calling
|
||||
// breakProtrudingToken.
|
||||
unsigned StartColumn = State.Column - Current.ColumnWidth;
|
||||
unsigned NewBreakPenalty = Current.isStringLiteral()
|
||||
? Style.PenaltyBreakString
|
||||
: Style.PenaltyBreakComment;
|
||||
// Stores whether we intentionally decide to let a line exceed the column
|
||||
// limit.
|
||||
bool Exceeded = false;
|
||||
// Stores whether we introduce a break anywhere in the token.
|
||||
bool BreakInserted = Token->introducesBreakBeforeToken();
|
||||
// Store whether we inserted a new line break at the end of the previous
|
||||
|
@ -1612,7 +1644,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
bool ContinueOnLine =
|
||||
ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
|
||||
unsigned ExcessCharactersPenalty = 0;
|
||||
if (!ContinueOnLine) {
|
||||
if (!ContinueOnLine && !Strict) {
|
||||
// Similarly, if the excess characters' penalty is lower than the
|
||||
// penalty of introducing a new break, continue on the current line.
|
||||
ExcessCharactersPenalty =
|
||||
|
@ -1621,8 +1653,10 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
DEBUG(llvm::dbgs()
|
||||
<< " Penalty excess: " << ExcessCharactersPenalty
|
||||
<< "\n break : " << NewBreakPenalty << "\n");
|
||||
if (ExcessCharactersPenalty < NewBreakPenalty)
|
||||
if (ExcessCharactersPenalty < NewBreakPenalty) {
|
||||
Exceeded = true;
|
||||
ContinueOnLine = true;
|
||||
}
|
||||
}
|
||||
if (ContinueOnLine) {
|
||||
DEBUG(llvm::dbgs() << " Continuing on line...\n");
|
||||
|
@ -1817,7 +1851,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
|
|||
|
||||
Token->updateNextToken(State);
|
||||
|
||||
return Penalty;
|
||||
return {Penalty, Exceeded};
|
||||
}
|
||||
|
||||
unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
|
||||
|
|
|
@ -123,14 +123,25 @@ private:
|
|||
/// \brief If the current token sticks out over the end of the line, break
|
||||
/// it if possible.
|
||||
///
|
||||
/// \returns An extra penalty if a token was broken, otherwise 0.
|
||||
/// \returns A pair (penalty, exceeded), where penalty is the extra penalty
|
||||
/// when tokens are broken or lines exceed the column limit, and exceeded
|
||||
/// indicates whether the algorithm purposefully left lines exceeding the
|
||||
/// column limit.
|
||||
///
|
||||
/// The returned penalty will cover the cost of the additional line breaks
|
||||
/// and column limit violation in all lines except for the last one. The
|
||||
/// penalty for the column limit violation in the last line (and in single
|
||||
/// line tokens) is handled in \c addNextStateToQueue.
|
||||
unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
|
||||
bool AllowBreak, bool DryRun);
|
||||
///
|
||||
/// \p Strict indicates whether reflowing is allowed to leave characters
|
||||
/// protruding the column limit; if true, lines will be split strictly within
|
||||
/// the column limit where possible; if false, words are allowed to protrude
|
||||
/// over the column limit as long as the penalty is less than the penalty
|
||||
/// of a break.
|
||||
std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
|
||||
LineState &State,
|
||||
bool AllowBreak, bool DryRun,
|
||||
bool Strict);
|
||||
|
||||
/// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
|
||||
/// if the current token cannot be broken.
|
||||
|
|
|
@ -9934,8 +9934,8 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
|
|||
Style.PenaltyExcessCharacter = 90;
|
||||
verifyFormat("int a; // the comment", Style);
|
||||
EXPECT_EQ("int a; // the comment\n"
|
||||
" // aa",
|
||||
format("int a; // the comment aa", Style));
|
||||
" // aaa",
|
||||
format("int a; // the comment aaa", Style));
|
||||
EXPECT_EQ("int a; /* first line\n"
|
||||
" * second line\n"
|
||||
" * third line\n"
|
||||
|
@ -9963,14 +9963,14 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
|
|||
Style));
|
||||
|
||||
EXPECT_EQ("// foo bar baz bazfoo\n"
|
||||
"// foo bar\n",
|
||||
"// foo bar foo bar\n",
|
||||
format("// foo bar baz bazfoo\n"
|
||||
"// foo bar\n",
|
||||
"// foo bar foo bar\n",
|
||||
Style));
|
||||
EXPECT_EQ("// foo bar baz bazfoo\n"
|
||||
"// foo bar\n",
|
||||
"// foo bar foo bar\n",
|
||||
format("// foo bar baz bazfoo\n"
|
||||
"// foo bar\n",
|
||||
"// foo bar foo bar\n",
|
||||
Style));
|
||||
|
||||
// FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the
|
||||
|
@ -9996,6 +9996,20 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) {
|
|||
"// foo bar baz bazfoo bar\n"
|
||||
"// foo bar\n",
|
||||
Style));
|
||||
|
||||
// Make sure we do not keep protruding characters if strict mode reflow is
|
||||
// cheaper than keeping protruding characters.
|
||||
Style.ColumnLimit = 21;
|
||||
EXPECT_EQ("// foo foo foo foo\n"
|
||||
"// foo foo foo foo\n"
|
||||
"// foo foo foo foo\n",
|
||||
format("// foo foo foo foo foo foo foo foo foo foo foo foo\n",
|
||||
Style));
|
||||
|
||||
EXPECT_EQ("int a = /* long block\n"
|
||||
" comment */\n"
|
||||
" 42;",
|
||||
format("int a = /* long block comment */ 42;", Style));
|
||||
}
|
||||
|
||||
#define EXPECT_ALL_STYLES_EQUAL(Styles) \
|
||||
|
|
Loading…
Reference in New Issue