forked from OSchip/llvm-project
453 lines
17 KiB
C++
453 lines
17 KiB
C++
//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This file implements an indenter that manages the indentation of
|
|
/// continuations.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
|
|
#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
|
|
|
|
#include "Encoding.h"
|
|
#include "FormatToken.h"
|
|
#include "clang/Format/Format.h"
|
|
#include "llvm/Support/Regex.h"
|
|
#include <map>
|
|
#include <tuple>
|
|
|
|
namespace clang {
|
|
class SourceManager;
|
|
|
|
namespace format {
|
|
|
|
class AnnotatedLine;
|
|
class BreakableToken;
|
|
struct FormatToken;
|
|
struct LineState;
|
|
struct ParenState;
|
|
struct RawStringFormatStyleManager;
|
|
class WhitespaceManager;
|
|
|
|
struct RawStringFormatStyleManager {
|
|
llvm::StringMap<FormatStyle> DelimiterStyle;
|
|
llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
|
|
|
|
RawStringFormatStyleManager(const FormatStyle &CodeStyle);
|
|
|
|
llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
|
|
|
|
llvm::Optional<FormatStyle>
|
|
getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
|
|
};
|
|
|
|
class ContinuationIndenter {
|
|
public:
|
|
/// Constructs a \c ContinuationIndenter to format \p Line starting in
|
|
/// column \p FirstIndent.
|
|
ContinuationIndenter(const FormatStyle &Style,
|
|
const AdditionalKeywords &Keywords,
|
|
const SourceManager &SourceMgr,
|
|
WhitespaceManager &Whitespaces,
|
|
encoding::Encoding Encoding,
|
|
bool BinPackInconclusiveFunctions);
|
|
|
|
/// Get the initial state, i.e. the state after placing \p Line's
|
|
/// first token at \p FirstIndent. When reformatting a fragment of code, as in
|
|
/// the case of formatting inside raw string literals, \p FirstStartColumn is
|
|
/// the column at which the state of the parent formatter is.
|
|
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
|
|
const AnnotatedLine *Line, bool DryRun);
|
|
|
|
// FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
|
|
// better home.
|
|
/// Returns \c true, if a line break after \p State is allowed.
|
|
bool canBreak(const LineState &State);
|
|
|
|
/// Returns \c true, if a line break after \p State is mandatory.
|
|
bool mustBreak(const LineState &State);
|
|
|
|
/// Appends the next token to \p State and updates information
|
|
/// necessary for indentation.
|
|
///
|
|
/// Puts the token on the current line if \p Newline is \c false and adds a
|
|
/// line break and necessary indentation otherwise.
|
|
///
|
|
/// If \p DryRun is \c false, also creates and stores the required
|
|
/// \c Replacement.
|
|
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
|
|
unsigned ExtraSpaces = 0);
|
|
|
|
/// Get the column limit for this line. This is the style's column
|
|
/// limit, potentially reduced for preprocessor definitions.
|
|
unsigned getColumnLimit(const LineState &State) const;
|
|
|
|
private:
|
|
/// Mark the next token as consumed in \p State and modify its stacks
|
|
/// accordingly.
|
|
unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
|
|
|
|
/// Update 'State' according to the next token's fake left parentheses.
|
|
void moveStatePastFakeLParens(LineState &State, bool Newline);
|
|
/// Update 'State' according to the next token's fake r_parens.
|
|
void moveStatePastFakeRParens(LineState &State);
|
|
|
|
/// Update 'State' according to the next token being one of "(<{[".
|
|
void moveStatePastScopeOpener(LineState &State, bool Newline);
|
|
/// Update 'State' according to the next token being one of ")>}]".
|
|
void moveStatePastScopeCloser(LineState &State);
|
|
/// Update 'State' with the next token opening a nested block.
|
|
void moveStateToNewBlock(LineState &State);
|
|
|
|
/// Reformats a raw string literal.
|
|
///
|
|
/// \returns An extra penalty induced by reformatting the token.
|
|
unsigned reformatRawStringLiteral(const FormatToken &Current,
|
|
LineState &State,
|
|
const FormatStyle &RawStringStyle,
|
|
bool DryRun);
|
|
|
|
/// If the current token is at the end of the current line, handle
|
|
/// the transition to the next line.
|
|
unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
|
|
bool DryRun, bool AllowBreak);
|
|
|
|
/// If \p Current is a raw string that is configured to be reformatted,
|
|
/// return the style to be used.
|
|
llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
|
|
const LineState &State);
|
|
|
|
/// If the current token sticks out over the end of the line, break
|
|
/// it if possible.
|
|
///
|
|
/// \returns A pair (penalty, exceeded), where penalty is the extra penalty
|
|
/// when tokens are broken or lines exceed the column limit, and exceeded
|
|
/// indicates whether the algorithm purposefully left lines exceeding the
|
|
/// column limit.
|
|
///
|
|
/// The returned penalty will cover the cost of the additional line breaks
|
|
/// and column limit violation in all lines except for the last one. The
|
|
/// penalty for the column limit violation in the last line (and in single
|
|
/// line tokens) is handled in \c addNextStateToQueue.
|
|
///
|
|
/// \p Strict indicates whether reflowing is allowed to leave characters
|
|
/// protruding the column limit; if true, lines will be split strictly within
|
|
/// the column limit where possible; if false, words are allowed to protrude
|
|
/// over the column limit as long as the penalty is less than the penalty
|
|
/// of a break.
|
|
std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
|
|
LineState &State,
|
|
bool AllowBreak, bool DryRun,
|
|
bool Strict);
|
|
|
|
/// Returns the \c BreakableToken starting at \p Current, or nullptr
|
|
/// if the current token cannot be broken.
|
|
std::unique_ptr<BreakableToken>
|
|
createBreakableToken(const FormatToken &Current, LineState &State,
|
|
bool AllowBreak);
|
|
|
|
/// Appends the next token to \p State and updates information
|
|
/// necessary for indentation.
|
|
///
|
|
/// Puts the token on the current line.
|
|
///
|
|
/// If \p DryRun is \c false, also creates and stores the required
|
|
/// \c Replacement.
|
|
void addTokenOnCurrentLine(LineState &State, bool DryRun,
|
|
unsigned ExtraSpaces);
|
|
|
|
/// Appends the next token to \p State and updates information
|
|
/// necessary for indentation.
|
|
///
|
|
/// Adds a line break and necessary indentation.
|
|
///
|
|
/// If \p DryRun is \c false, also creates and stores the required
|
|
/// \c Replacement.
|
|
unsigned addTokenOnNewLine(LineState &State, bool DryRun);
|
|
|
|
/// Calculate the new column for a line wrap before the next token.
|
|
unsigned getNewLineColumn(const LineState &State);
|
|
|
|
/// Adds a multiline token to the \p State.
|
|
///
|
|
/// \returns Extra penalty for the first line of the literal: last line is
|
|
/// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
|
|
/// matter, as we don't change them.
|
|
unsigned addMultilineToken(const FormatToken &Current, LineState &State);
|
|
|
|
/// Returns \c true if the next token starts a multiline string
|
|
/// literal.
|
|
///
|
|
/// This includes implicitly concatenated strings, strings that will be broken
|
|
/// by clang-format and string literals with escaped newlines.
|
|
bool nextIsMultilineString(const LineState &State);
|
|
|
|
FormatStyle Style;
|
|
const AdditionalKeywords &Keywords;
|
|
const SourceManager &SourceMgr;
|
|
WhitespaceManager &Whitespaces;
|
|
encoding::Encoding Encoding;
|
|
bool BinPackInconclusiveFunctions;
|
|
llvm::Regex CommentPragmasRegex;
|
|
const RawStringFormatStyleManager RawStringFormats;
|
|
};
|
|
|
|
struct ParenState {
|
|
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
|
|
bool AvoidBinPacking, bool NoLineBreak)
|
|
: Tok(Tok), Indent(Indent), LastSpace(LastSpace),
|
|
NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
|
|
AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
|
|
NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
|
|
LastOperatorWrapped(true), ContainsLineBreak(false),
|
|
ContainsUnwrappedBuilder(false), AlignColons(true),
|
|
ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
|
|
NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
|
|
|
|
/// \brief The token opening this parenthesis level, or nullptr if this level
|
|
/// is opened by fake parenthesis.
|
|
///
|
|
/// Not considered for memoization as it will always have the same value at
|
|
/// the same token.
|
|
const FormatToken *Tok;
|
|
|
|
/// The position to which a specific parenthesis level needs to be
|
|
/// indented.
|
|
unsigned Indent;
|
|
|
|
/// The position of the last space on each level.
|
|
///
|
|
/// Used e.g. to break like:
|
|
/// functionCall(Parameter, otherCall(
|
|
/// OtherParameter));
|
|
unsigned LastSpace;
|
|
|
|
/// If a block relative to this parenthesis level gets wrapped, indent
|
|
/// it this much.
|
|
unsigned NestedBlockIndent;
|
|
|
|
/// The position the first "<<" operator encountered on each level.
|
|
///
|
|
/// Used to align "<<" operators. 0 if no such operator has been encountered
|
|
/// on a level.
|
|
unsigned FirstLessLess = 0;
|
|
|
|
/// The column of a \c ? in a conditional expression;
|
|
unsigned QuestionColumn = 0;
|
|
|
|
/// The position of the colon in an ObjC method declaration/call.
|
|
unsigned ColonPos = 0;
|
|
|
|
/// The start of the most recent function in a builder-type call.
|
|
unsigned StartOfFunctionCall = 0;
|
|
|
|
/// Contains the start of array subscript expressions, so that they
|
|
/// can be aligned.
|
|
unsigned StartOfArraySubscripts = 0;
|
|
|
|
/// If a nested name specifier was broken over multiple lines, this
|
|
/// contains the start column of the second line. Otherwise 0.
|
|
unsigned NestedNameSpecifierContinuation = 0;
|
|
|
|
/// If a call expression was broken over multiple lines, this
|
|
/// contains the start column of the second line. Otherwise 0.
|
|
unsigned CallContinuation = 0;
|
|
|
|
/// The column of the first variable name in a variable declaration.
|
|
///
|
|
/// Used to align further variables if necessary.
|
|
unsigned VariablePos = 0;
|
|
|
|
/// Whether a newline needs to be inserted before the block's closing
|
|
/// brace.
|
|
///
|
|
/// We only want to insert a newline before the closing brace if there also
|
|
/// was a newline after the beginning left brace.
|
|
bool BreakBeforeClosingBrace : 1;
|
|
|
|
/// Avoid bin packing, i.e. multiple parameters/elements on multiple
|
|
/// lines, in this context.
|
|
bool AvoidBinPacking : 1;
|
|
|
|
/// Break after the next comma (or all the commas in this context if
|
|
/// \c AvoidBinPacking is \c true).
|
|
bool BreakBeforeParameter : 1;
|
|
|
|
/// Line breaking in this context would break a formatting rule.
|
|
bool NoLineBreak : 1;
|
|
|
|
/// Same as \c NoLineBreak, but is restricted until the end of the
|
|
/// operand (including the next ",").
|
|
bool NoLineBreakInOperand : 1;
|
|
|
|
/// True if the last binary operator on this level was wrapped to the
|
|
/// next line.
|
|
bool LastOperatorWrapped : 1;
|
|
|
|
/// \c true if this \c ParenState already contains a line-break.
|
|
///
|
|
/// The first line break in a certain \c ParenState causes extra penalty so
|
|
/// that clang-format prefers similar breaks, i.e. breaks in the same
|
|
/// parenthesis.
|
|
bool ContainsLineBreak : 1;
|
|
|
|
/// \c true if this \c ParenState contains multiple segments of a
|
|
/// builder-type call on one line.
|
|
bool ContainsUnwrappedBuilder : 1;
|
|
|
|
/// \c true if the colons of the curren ObjC method expression should
|
|
/// be aligned.
|
|
///
|
|
/// Not considered for memoization as it will always have the same value at
|
|
/// the same token.
|
|
bool AlignColons : 1;
|
|
|
|
/// \c true if at least one selector name was found in the current
|
|
/// ObjC method expression.
|
|
///
|
|
/// Not considered for memoization as it will always have the same value at
|
|
/// the same token.
|
|
bool ObjCSelectorNameFound : 1;
|
|
|
|
/// \c true if there are multiple nested blocks inside these parens.
|
|
///
|
|
/// Not considered for memoization as it will always have the same value at
|
|
/// the same token.
|
|
bool HasMultipleNestedBlocks : 1;
|
|
|
|
/// The start of a nested block (e.g. lambda introducer in C++ or
|
|
/// "function" in JavaScript) is not wrapped to a new line.
|
|
bool NestedBlockInlined : 1;
|
|
|
|
/// \c true if the current \c ParenState represents an Objective-C
|
|
/// array literal.
|
|
bool IsInsideObjCArrayLiteral : 1;
|
|
|
|
bool operator<(const ParenState &Other) const {
|
|
if (Indent != Other.Indent)
|
|
return Indent < Other.Indent;
|
|
if (LastSpace != Other.LastSpace)
|
|
return LastSpace < Other.LastSpace;
|
|
if (NestedBlockIndent != Other.NestedBlockIndent)
|
|
return NestedBlockIndent < Other.NestedBlockIndent;
|
|
if (FirstLessLess != Other.FirstLessLess)
|
|
return FirstLessLess < Other.FirstLessLess;
|
|
if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
|
|
return BreakBeforeClosingBrace;
|
|
if (QuestionColumn != Other.QuestionColumn)
|
|
return QuestionColumn < Other.QuestionColumn;
|
|
if (AvoidBinPacking != Other.AvoidBinPacking)
|
|
return AvoidBinPacking;
|
|
if (BreakBeforeParameter != Other.BreakBeforeParameter)
|
|
return BreakBeforeParameter;
|
|
if (NoLineBreak != Other.NoLineBreak)
|
|
return NoLineBreak;
|
|
if (LastOperatorWrapped != Other.LastOperatorWrapped)
|
|
return LastOperatorWrapped;
|
|
if (ColonPos != Other.ColonPos)
|
|
return ColonPos < Other.ColonPos;
|
|
if (StartOfFunctionCall != Other.StartOfFunctionCall)
|
|
return StartOfFunctionCall < Other.StartOfFunctionCall;
|
|
if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
|
|
return StartOfArraySubscripts < Other.StartOfArraySubscripts;
|
|
if (CallContinuation != Other.CallContinuation)
|
|
return CallContinuation < Other.CallContinuation;
|
|
if (VariablePos != Other.VariablePos)
|
|
return VariablePos < Other.VariablePos;
|
|
if (ContainsLineBreak != Other.ContainsLineBreak)
|
|
return ContainsLineBreak;
|
|
if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
|
|
return ContainsUnwrappedBuilder;
|
|
if (NestedBlockInlined != Other.NestedBlockInlined)
|
|
return NestedBlockInlined;
|
|
return false;
|
|
}
|
|
};
|
|
|
|
/// The current state when indenting a unwrapped line.
|
|
///
|
|
/// As the indenting tries different combinations this is copied by value.
|
|
struct LineState {
|
|
/// The number of used columns in the current line.
|
|
unsigned Column;
|
|
|
|
/// The token that needs to be next formatted.
|
|
FormatToken *NextToken;
|
|
|
|
/// \c true if this line contains a continued for-loop section.
|
|
bool LineContainsContinuedForLoopSection;
|
|
|
|
/// \c true if \p NextToken should not continue this line.
|
|
bool NoContinuation;
|
|
|
|
/// The \c NestingLevel at the start of this line.
|
|
unsigned StartOfLineLevel;
|
|
|
|
/// The lowest \c NestingLevel on the current line.
|
|
unsigned LowestLevelOnLine;
|
|
|
|
/// The start column of the string literal, if we're in a string
|
|
/// literal sequence, 0 otherwise.
|
|
unsigned StartOfStringLiteral;
|
|
|
|
/// A stack keeping track of properties applying to parenthesis
|
|
/// levels.
|
|
std::vector<ParenState> Stack;
|
|
|
|
/// Ignore the stack of \c ParenStates for state comparison.
|
|
///
|
|
/// In long and deeply nested unwrapped lines, the current algorithm can
|
|
/// be insufficient for finding the best formatting with a reasonable amount
|
|
/// of time and memory. Setting this flag will effectively lead to the
|
|
/// algorithm not analyzing some combinations. However, these combinations
|
|
/// rarely contain the optimal solution: In short, accepting a higher
|
|
/// penalty early would need to lead to different values in the \c
|
|
/// ParenState stack (in an otherwise identical state) and these different
|
|
/// values would need to lead to a significant amount of avoided penalty
|
|
/// later.
|
|
///
|
|
/// FIXME: Come up with a better algorithm instead.
|
|
bool IgnoreStackForComparison;
|
|
|
|
/// The indent of the first token.
|
|
unsigned FirstIndent;
|
|
|
|
/// The line that is being formatted.
|
|
///
|
|
/// Does not need to be considered for memoization because it doesn't change.
|
|
const AnnotatedLine *Line;
|
|
|
|
/// Comparison operator to be able to used \c LineState in \c map.
|
|
bool operator<(const LineState &Other) const {
|
|
if (NextToken != Other.NextToken)
|
|
return NextToken < Other.NextToken;
|
|
if (Column != Other.Column)
|
|
return Column < Other.Column;
|
|
if (LineContainsContinuedForLoopSection !=
|
|
Other.LineContainsContinuedForLoopSection)
|
|
return LineContainsContinuedForLoopSection;
|
|
if (NoContinuation != Other.NoContinuation)
|
|
return NoContinuation;
|
|
if (StartOfLineLevel != Other.StartOfLineLevel)
|
|
return StartOfLineLevel < Other.StartOfLineLevel;
|
|
if (LowestLevelOnLine != Other.LowestLevelOnLine)
|
|
return LowestLevelOnLine < Other.LowestLevelOnLine;
|
|
if (StartOfStringLiteral != Other.StartOfStringLiteral)
|
|
return StartOfStringLiteral < Other.StartOfStringLiteral;
|
|
if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
|
|
return false;
|
|
return Stack < Other.Stack;
|
|
}
|
|
};
|
|
|
|
} // end namespace format
|
|
} // end namespace clang
|
|
|
|
#endif
|