From 0b58c328d4344329f71c7e3de7e280fbbdbb8ff8 Mon Sep 17 00:00:00 2001 From: Manuel Klimek Date: Fri, 1 Dec 2017 13:28:08 +0000 Subject: [PATCH] Better trade-off for excess characters vs. staying within the column limits. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we break a long line like: Column limit: 21 | // foo foo foo foo foo foo foo foo foo foo foo foo The local decision when to allow protruding vs. breaking can lead to this outcome (2 excess characters, 2 breaks): // foo foo foo foo foo // foo foo foo foo foo // foo foo While strictly staying within the column limit leads to this strictly better outcome (fully below the column limit, 2 breaks): // foo foo foo foo // foo foo foo foo // foo foo foo foo To get an optimal solution, we would need to consider all combinations of excess characters vs. breaking for all lines, but that would lead to a significant increase in the search space of the algorithm for little gain. Instead, we blindly try both approches and·select the one that leads to the overall lower penalty. Differential Revision: https://reviews.llvm.org/D40605 llvm-svn: 319541 --- clang/lib/Format/ContinuationIndenter.cpp | 54 ++++++++++++++++++----- clang/lib/Format/ContinuationIndenter.h | 17 +++++-- clang/unittests/Format/FormatTest.cpp | 26 ++++++++--- 3 files changed, 78 insertions(+), 19 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index c0d2bf6f64ce..e022a3a8c771 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1390,7 +1390,36 @@ unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, Penalty = addMultilineToken(Current, State); } else if (State.Line->Type != LT_ImportStatement) { // We generally don't break import statements. - Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun); + LineState OriginalState = State; + + // Whether we force the reflowing algorithm to stay strictly within the + // column limit. + bool Strict = false; + // Whether the first non-strict attempt at reflowing did intentionally + // exceed the column limit. + bool Exceeded = false; + std::tie(Penalty, Exceeded) = breakProtrudingToken( + Current, State, AllowBreak, /*DryRun=*/true, Strict); + if (Exceeded) { + // If non-strict reflowing exceeds the column limit, try whether strict + // reflowing leads to an overall lower penalty. + LineState StrictState = OriginalState; + unsigned StrictPenalty = + breakProtrudingToken(Current, StrictState, AllowBreak, + /*DryRun=*/true, /*Strict=*/true) + .first; + Strict = StrictPenalty <= Penalty; + if (Strict) { + Penalty = StrictPenalty; + State = StrictState; + } + } + if (!DryRun) { + // If we're not in dry-run mode, apply the changes with the decision on + // strictness made above. + breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false, + Strict); + } } if (State.Column > getColumnLimit(State)) { unsigned ExcessCharacters = State.Column - getColumnLimit(State); @@ -1480,14 +1509,14 @@ std::unique_ptr ContinuationIndenter::createBreakableToken( return nullptr; } -unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool AllowBreak, - bool DryRun) { +std::pair +ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, bool AllowBreak, + bool DryRun, bool Strict) { std::unique_ptr Token = createBreakableToken(Current, State, AllowBreak); if (!Token) - return 0; + return {0, false}; assert(Token->getLineCount() > 0); unsigned ColumnLimit = getColumnLimit(State); if (Current.is(TT_LineComment)) { @@ -1495,13 +1524,16 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, ColumnLimit = Style.ColumnLimit; } if (Current.UnbreakableTailLength >= ColumnLimit) - return 0; + return {0, false}; // ColumnWidth was already accounted into State.Column before calling // breakProtrudingToken. unsigned StartColumn = State.Column - Current.ColumnWidth; unsigned NewBreakPenalty = Current.isStringLiteral() ? Style.PenaltyBreakString : Style.PenaltyBreakComment; + // Stores whether we intentionally decide to let a line exceed the column + // limit. + bool Exceeded = false; // Stores whether we introduce a break anywhere in the token. bool BreakInserted = Token->introducesBreakBeforeToken(); // Store whether we inserted a new line break at the end of the previous @@ -1612,7 +1644,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, bool ContinueOnLine = ContentStartColumn + ToNextSplitColumns <= ColumnLimit; unsigned ExcessCharactersPenalty = 0; - if (!ContinueOnLine) { + if (!ContinueOnLine && !Strict) { // Similarly, if the excess characters' penalty is lower than the // penalty of introducing a new break, continue on the current line. ExcessCharactersPenalty = @@ -1621,8 +1653,10 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty << "\n break : " << NewBreakPenalty << "\n"); - if (ExcessCharactersPenalty < NewBreakPenalty) + if (ExcessCharactersPenalty < NewBreakPenalty) { + Exceeded = true; ContinueOnLine = true; + } } if (ContinueOnLine) { DEBUG(llvm::dbgs() << " Continuing on line...\n"); @@ -1817,7 +1851,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Token->updateNextToken(State); - return Penalty; + return {Penalty, Exceeded}; } unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { diff --git a/clang/lib/Format/ContinuationIndenter.h b/clang/lib/Format/ContinuationIndenter.h index 5bdd555109a0..ded7bfab4267 100644 --- a/clang/lib/Format/ContinuationIndenter.h +++ b/clang/lib/Format/ContinuationIndenter.h @@ -123,14 +123,25 @@ private: /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// - /// \returns An extra penalty if a token was broken, otherwise 0. + /// \returns A pair (penalty, exceeded), where penalty is the extra penalty + /// when tokens are broken or lines exceed the column limit, and exceeded + /// indicates whether the algorithm purposefully left lines exceeding the + /// column limit. /// /// The returned penalty will cover the cost of the additional line breaks /// and column limit violation in all lines except for the last one. The /// penalty for the column limit violation in the last line (and in single /// line tokens) is handled in \c addNextStateToQueue. - unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, - bool AllowBreak, bool DryRun); + /// + /// \p Strict indicates whether reflowing is allowed to leave characters + /// protruding the column limit; if true, lines will be split strictly within + /// the column limit where possible; if false, words are allowed to protrude + /// over the column limit as long as the penalty is less than the penalty + /// of a break. + std::pair breakProtrudingToken(const FormatToken &Current, + LineState &State, + bool AllowBreak, bool DryRun, + bool Strict); /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr /// if the current token cannot be broken. diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 7be9817b7a95..6f4bd6d69d0d 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -9934,8 +9934,8 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { Style.PenaltyExcessCharacter = 90; verifyFormat("int a; // the comment", Style); EXPECT_EQ("int a; // the comment\n" - " // aa", - format("int a; // the comment aa", Style)); + " // aaa", + format("int a; // the comment aaa", Style)); EXPECT_EQ("int a; /* first line\n" " * second line\n" " * third line\n" @@ -9963,14 +9963,14 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { Style)); EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", format("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", Style)); EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", format("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", Style)); // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the @@ -9996,6 +9996,20 @@ TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { "// foo bar baz bazfoo bar\n" "// foo bar\n", Style)); + + // Make sure we do not keep protruding characters if strict mode reflow is + // cheaper than keeping protruding characters. + Style.ColumnLimit = 21; + EXPECT_EQ("// foo foo foo foo\n" + "// foo foo foo foo\n" + "// foo foo foo foo\n", + format("// foo foo foo foo foo foo foo foo foo foo foo foo\n", + Style)); + + EXPECT_EQ("int a = /* long block\n" + " comment */\n" + " 42;", + format("int a = /* long block comment */ 42;", Style)); } #define EXPECT_ALL_STYLES_EQUAL(Styles) \