Refactor datastructure used in clang-format.

Main difference, add an AnnotatedLine class to hold information about a
line while formatting. At the same time degrade the UnwrappedLine class
to a class solely used for communicating between the UnwrappedLineParser
and the Formatter.

No functional changes intended.

llvm-svn: 172403
This commit is contained in:
Daniel Jasper 2013-01-14 13:08:07 +00:00
parent 13f23e17c7
commit f1e4b7d750
1 changed files with 137 additions and 122 deletions

View File

@ -93,6 +93,19 @@ public:
AnnotatedToken *Parent; AnnotatedToken *Parent;
}; };
class AnnotatedLine {
public:
AnnotatedLine(const FormatToken &FormatTok, unsigned Level,
bool InPPDirective)
: First(FormatTok), Level(Level), InPPDirective(InPPDirective) {}
AnnotatedToken First;
AnnotatedToken *Last;
LineType Type;
unsigned Level;
bool InPPDirective;
};
static prec::Level getPrecedence(const AnnotatedToken &Tok) { static prec::Level getPrecedence(const AnnotatedToken &Tok) {
return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
} }
@ -168,11 +181,13 @@ static void replacePPWhitespace(
NewLineText + std::string(Spaces, ' '))); NewLineText + std::string(Spaces, ' ')));
} }
/// \brief Checks whether the (remaining) \c UnwrappedLine starting with /// \brief Calculates whether the (remaining) \c AnnotatedLine starting with
/// \p RootToken fits into \p Limit columns. /// \p RootToken fits into \p Limit columns on a single line.
static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) { ///
/// If true, sets \p Length to the required length.
static bool fitsIntoLimit(const AnnotatedToken &RootToken,
unsigned Limit, unsigned* Length = 0) {
unsigned Columns = RootToken.FormatTok.TokenLength; unsigned Columns = RootToken.FormatTok.TokenLength;
bool FitsOnALine = true;
const AnnotatedToken *Tok = &RootToken; const AnnotatedToken *Tok = &RootToken;
while (!Tok->Children.empty()) { while (!Tok->Children.empty()) {
Tok = &Tok->Children[0]; Tok = &Tok->Children[0];
@ -181,11 +196,13 @@ static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) {
// needs to be put on a new line if the line needs to be split. // needs to be put on a new line if the line needs to be split.
if (Columns > Limit || if (Columns > Limit ||
(Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) { (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) {
FitsOnALine = false; // FIXME: Remove this hack.
break; return false;
} }
} }
return FitsOnALine; if (Length != 0)
*Length = Columns;
return true;
} }
/// \brief Returns if a token is an Objective-C selector name. /// \brief Returns if a token is an Objective-C selector name.
@ -200,7 +217,7 @@ static bool isObjCSelectorName(const AnnotatedToken &Tok) {
class UnwrappedLineFormatter { class UnwrappedLineFormatter {
public: public:
UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
const UnwrappedLine &Line, unsigned FirstIndent, const AnnotatedLine &Line, unsigned FirstIndent,
bool FitsOnALine, const AnnotatedToken &RootToken, bool FitsOnALine, const AnnotatedToken &RootToken,
tooling::Replacements &Replaces, bool StructuralError) tooling::Replacements &Replaces, bool StructuralError)
: Style(Style), SourceMgr(SourceMgr), Line(Line), : Style(Style), SourceMgr(SourceMgr), Line(Line),
@ -599,7 +616,7 @@ private:
FormatStyle Style; FormatStyle Style;
SourceManager &SourceMgr; SourceManager &SourceMgr;
const UnwrappedLine &Line; const AnnotatedLine &Line;
const unsigned FirstIndent; const unsigned FirstIndent;
const bool FitsOnALine; const bool FitsOnALine;
const AnnotatedToken &RootToken; const AnnotatedToken &RootToken;
@ -616,10 +633,9 @@ private:
/// \c UnwrappedLine. /// \c UnwrappedLine.
class TokenAnnotator { class TokenAnnotator {
public: public:
TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style, TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
SourceManager &SourceMgr, Lexer &Lex) AnnotatedLine &Line)
: Style(Style), SourceMgr(SourceMgr), Lex(Lex), : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {}
RootToken(Line.RootToken) {}
/// \brief A parser that gathers additional information about tokens. /// \brief A parser that gathers additional information about tokens.
/// ///
@ -888,7 +904,9 @@ public:
}; };
void createAnnotatedTokens(AnnotatedToken &Current) { void createAnnotatedTokens(AnnotatedToken &Current) {
if (!Current.FormatTok.Children.empty()) { if (Current.FormatTok.Children.empty()) {
Line.Last = &Current;
} else {
Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0])); Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0]));
Current.Children.back().Parent = &Current; Current.Children.back().Parent = &Current;
createAnnotatedTokens(Current.Children.back()); createAnnotatedTokens(Current.Children.back());
@ -917,34 +935,30 @@ public:
calculateExtraInformation(Current.Children[0]); calculateExtraInformation(Current.Children[0]);
} }
bool annotate() { void annotate() {
createAnnotatedTokens(RootToken); Line.Last = &Line.First;
createAnnotatedTokens(Line.First);
AnnotatingParser Parser(RootToken); AnnotatingParser Parser(Line.First);
CurrentLineType = Parser.parseLine(); Line.Type = Parser.parseLine();
if (CurrentLineType == LT_Invalid) if (Line.Type == LT_Invalid)
return false; return;
determineTokenTypes(RootToken, /*IsRHS=*/false); determineTokenTypes(Line.First, /*IsRHS=*/false);
if (RootToken.Type == TT_ObjCMethodSpecifier) if (Line.First.Type == TT_ObjCMethodSpecifier)
CurrentLineType = LT_ObjCMethodDecl; Line.Type = LT_ObjCMethodDecl;
else if (RootToken.Type == TT_ObjCDecl) else if (Line.First.Type == TT_ObjCDecl)
CurrentLineType = LT_ObjCDecl; Line.Type = LT_ObjCDecl;
else if (RootToken.Type == TT_ObjCProperty) else if (Line.First.Type == TT_ObjCProperty)
CurrentLineType = LT_ObjCProperty; Line.Type = LT_ObjCProperty;
if (!RootToken.Children.empty()) Line.First.SpaceRequiredBefore = true;
calculateExtraInformation(RootToken.Children[0]); Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
return true; Line.First.CanBreakBefore = Line.First.MustBreakBefore;
}
LineType getLineType() { if (!Line.First.Children.empty())
return CurrentLineType; calculateExtraInformation(Line.First.Children[0]);
}
const AnnotatedToken &getRootToken() {
return RootToken;
} }
private: private:
@ -1079,7 +1093,7 @@ private:
return false; return false;
if (Right.is(tok::less) && if (Right.is(tok::less) &&
(Left.is(tok::kw_template) || (Left.is(tok::kw_template) ||
(CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
return true; return true;
if (Left.is(tok::arrow) || Right.is(tok::arrow)) if (Left.is(tok::arrow) || Right.is(tok::arrow))
return false; return false;
@ -1119,7 +1133,7 @@ private:
if (Left.is(tok::l_paren)) if (Left.is(tok::l_paren))
return false; return false;
if (Right.is(tok::l_paren)) { if (Right.is(tok::l_paren)) {
return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) || return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
Left.is(tok::kw_for) || Left.is(tok::kw_while) || Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
Left.is(tok::kw_switch) || Left.is(tok::kw_return) || Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
Left.is(tok::kw_catch) || Left.is(tok::kw_new) || Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
@ -1134,7 +1148,7 @@ private:
} }
bool spaceRequiredBefore(const AnnotatedToken &Tok) { bool spaceRequiredBefore(const AnnotatedToken &Tok) {
if (CurrentLineType == LT_ObjCMethodDecl) { if (Line.Type == LT_ObjCMethodDecl) {
if (Tok.is(tok::identifier) && !Tok.Children.empty() && if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
return true; return true;
@ -1151,7 +1165,7 @@ private:
// Don't space between ':' and '(' // Don't space between ':' and '('
return false; return false;
} }
if (CurrentLineType == LT_ObjCProperty && if (Line.Type == LT_ObjCProperty &&
(Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
return false; return false;
@ -1167,7 +1181,7 @@ private:
if (Tok.Parent->Type == TT_OverloadedOperator) if (Tok.Parent->Type == TT_OverloadedOperator)
return false; return false;
if (Tok.is(tok::colon)) if (Tok.is(tok::colon))
return RootToken.isNot(tok::kw_case) && !Tok.Children.empty() && return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
Tok.Type != TT_ObjCMethodExpr; Tok.Type != TT_ObjCMethodExpr;
if (Tok.Parent->Type == TT_UnaryOperator || if (Tok.Parent->Type == TT_UnaryOperator ||
Tok.Parent->Type == TT_CastRParen) Tok.Parent->Type == TT_CastRParen)
@ -1185,7 +1199,7 @@ private:
return true; return true;
if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
return false; return false;
if (Tok.is(tok::less) && RootToken.is(tok::hash)) if (Tok.is(tok::less) && Line.First.is(tok::hash))
return true; return true;
if (Tok.Type == TT_TrailingUnaryOperator) if (Tok.Type == TT_TrailingUnaryOperator)
return false; return false;
@ -1194,7 +1208,7 @@ private:
bool canBreakBefore(const AnnotatedToken &Right) { bool canBreakBefore(const AnnotatedToken &Right) {
const AnnotatedToken &Left = *Right.Parent; const AnnotatedToken &Left = *Right.Parent;
if (CurrentLineType == LT_ObjCMethodDecl) { if (Line.Type == LT_ObjCMethodDecl) {
if (Right.is(tok::identifier) && !Right.Children.empty() && if (Right.is(tok::identifier) && !Right.Children.empty() &&
Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
return true; return true;
@ -1221,7 +1235,7 @@ private:
if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr) Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr)
return false; return false;
if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl) if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
return false; return false;
if (Right.is(tok::comment)) if (Right.is(tok::comment))
@ -1246,8 +1260,7 @@ private:
FormatStyle Style; FormatStyle Style;
SourceManager &SourceMgr; SourceManager &SourceMgr;
Lexer &Lex; Lexer &Lex;
LineType CurrentLineType; AnnotatedLine &Line;
AnnotatedToken RootToken;
}; };
class LexerBasedFormatTokenSource : public FormatTokenSource { class LexerBasedFormatTokenSource : public FormatTokenSource {
@ -1347,35 +1360,32 @@ public:
UnwrappedLineParser Parser(Diag, Style, Tokens, *this); UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
StructuralError = Parser.parse(); StructuralError = Parser.parse();
unsigned PreviousEndOfLineColumn = 0; unsigned PreviousEndOfLineColumn = 0;
for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(), for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
E = UnwrappedLines.end(); TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
Annotator.annotate();
}
for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
E = AnnotatedLines.end();
I != E; ++I) { I != E; ++I) {
const UnwrappedLine &TheLine = *I; const AnnotatedLine &TheLine = *I;
if (touchesRanges(TheLine)) { if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) {
OwningPtr<TokenAnnotator> AnnotatedLine( unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level,
new TokenAnnotator(TheLine, Style, SourceMgr, Lex)); TheLine.InPPDirective,
if (!AnnotatedLine->annotate())
break;
unsigned Indent = formatFirstToken(AnnotatedLine->getRootToken(),
TheLine.Level, TheLine.InPPDirective,
PreviousEndOfLineColumn); PreviousEndOfLineColumn);
bool FitsOnALine = tryFitMultipleLinesInOne(Indent, I, E);
UnwrappedLine Line(TheLine); UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
bool FitsOnALine = tryFitMultipleLinesInOne(Indent, Line, AnnotatedLine, FitsOnALine, TheLine.First, Replaces,
I, E); StructuralError);
UnwrappedLineFormatter Formatter(
Style, SourceMgr, Line, Indent, FitsOnALine,
AnnotatedLine->getRootToken(), Replaces, StructuralError);
PreviousEndOfLineColumn = Formatter.format(); PreviousEndOfLineColumn = Formatter.format();
} else { } else {
// If we did not reformat this unwrapped line, the column at the end of // If we did not reformat this unwrapped line, the column at the end of
// the last token is unchanged - thus, we can calculate the end of the // the last token is unchanged - thus, we can calculate the end of the
// last token, and return the result. // last token, and return the result.
const FormatToken *Last = getLastInLine(TheLine);
PreviousEndOfLineColumn = PreviousEndOfLineColumn =
SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) + SourceMgr.getSpellingColumnNumber(
Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr, TheLine.Last->FormatTok.Tok.getLocation()) +
Lex.getLangOpts()) - Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
SourceMgr, Lex.getLangOpts()) -
1; 1;
} }
} }
@ -1389,81 +1399,85 @@ private:
/// if possible; note that \c I will be incremented when lines are merged. /// if possible; note that \c I will be incremented when lines are merged.
/// ///
/// Returns whether the resulting \c Line can fit in a single line. /// Returns whether the resulting \c Line can fit in a single line.
bool tryFitMultipleLinesInOne(unsigned Indent, UnwrappedLine &Line, bool tryFitMultipleLinesInOne(unsigned Indent,
OwningPtr<TokenAnnotator> &AnnotatedLine, std::vector<AnnotatedLine>::iterator &I,
std::vector<UnwrappedLine>::iterator &I, std::vector<AnnotatedLine>::iterator E) {
std::vector<UnwrappedLine>::iterator E) {
unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent; unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
// Check whether the UnwrappedLine can be put onto a single line. If // Check whether the UnwrappedLine can be put onto a single line. If
// so, this is bound to be the optimal solution (by definition) and we // so, this is bound to be the optimal solution (by definition) and we
// don't need to analyze the entire solution space. // don't need to analyze the entire solution space.
bool FitsOnALine = fitsIntoLimit(AnnotatedLine->getRootToken(), Limit); unsigned LengthLine1 = 0;
if (!FitsOnALine || I + 1 == E || I + 2 == E) if (!fitsIntoLimit(I->First, Limit, &LengthLine1))
return FitsOnALine; return false;
// Try to merge the next two lines if possible. // Check that we still have three lines and they fit into the limit.
UnwrappedLine Combined(Line); if (I + 1 == E || I + 2 == E)
return true;
unsigned LengthLine2 = 0;
unsigned LengthLine3 = 0;
if (!fitsIntoLimit((I + 1)->First, Limit, &LengthLine2) ||
!fitsIntoLimit((I + 2)->First, Limit, &LengthLine3))
return true;
if (LengthLine1 + LengthLine2 + LengthLine3 + 2 > Limit) // Two spaces.
return true;
// First, check that the current line allows merging. This is the case if // First, check that the current line allows merging. This is the case if
// we're not in a control flow statement and the last token is an opening // we're not in a control flow statement and the last token is an opening
// brace. // brace.
FormatToken *Last = &Combined.RootToken; AnnotatedLine& Line = *I;
bool AllowedTokens = bool AllowedTokens =
Last->Tok.isNot(tok::kw_if) && Last->Tok.isNot(tok::kw_while) && Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
Last->Tok.isNot(tok::kw_do) && Last->Tok.isNot(tok::r_brace) && Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
Last->Tok.isNot(tok::kw_else) && Last->Tok.isNot(tok::kw_try) && Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
Last->Tok.isNot(tok::kw_catch) && Last->Tok.isNot(tok::kw_for) && Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
// This gets rid of all ObjC @ keywords and methods. // This gets rid of all ObjC @ keywords and methods.
Last->Tok.isNot(tok::at) && Last->Tok.isNot(tok::minus) && Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
Last->Tok.isNot(tok::plus); Line.First.isNot(tok::plus);
while (!Last->Children.empty()) if (Line.Last->isNot(tok::l_brace) || !AllowedTokens)
Last = &Last->Children.back(); return true;
if (!Last->Tok.is(tok::l_brace))
return FitsOnALine;
// Second, check that the next line does not contain any braces - if it // Second, check that the next line does not contain any braces - if it
// does, readability declines when putting it into a single line. // does, readability declines when putting it into a single line.
const FormatToken *Next = &(I + 1)->RootToken; const AnnotatedToken *Tok = &(I + 1)->First;
while (Next) { if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
AllowedTokens = AllowedTokens && !Next->Tok.is(tok::l_brace) && return true;
!Next->Tok.is(tok::r_brace); do {
Last->Children.push_back(*Next); if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
Last = &Last->Children[0]; return true;
Last->Children.clear(); Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
Next = Next->Children.empty() ? NULL : &Next->Children.back(); } while (Tok != NULL);
}
// Last, check that the third line contains a single closing brace. // Last, check that the third line contains a single closing brace.
Next = &(I + 2)->RootToken; Tok = &(I + 2)->First;
AllowedTokens = AllowedTokens && Next->Tok.is(tok::r_brace); if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
if (!Next->Children.empty() || !AllowedTokens) Tok->MustBreakBefore)
return FitsOnALine; return true;
Last->Children.push_back(*Next);
OwningPtr<TokenAnnotator> CombinedAnnotator( // If the merged line fits, we use that instead and skip the next two lines.
new TokenAnnotator(Combined, Style, SourceMgr, Lex)); Line.Last->Children.push_back((I + 1)->First);
if (CombinedAnnotator->annotate() && while (!Line.Last->Children.empty()) {
fitsIntoLimit(CombinedAnnotator->getRootToken(), Limit)) { Line.Last->Children[0].Parent = Line.Last;
// If the merged line fits, we use that instead and skip the next two Line.Last = &Line.Last->Children[0];
// lines.
AnnotatedLine.reset(CombinedAnnotator.take());
Line = Combined;
I += 2;
} }
return FitsOnALine;
join(Line, *(I + 1));
join(Line, *(I + 2));
I += 2;
return true;
} }
const FormatToken *getLastInLine(const UnwrappedLine &TheLine) { void join(AnnotatedLine &A, const AnnotatedLine &B) {
const FormatToken *Last = &TheLine.RootToken; A.Last->Children.push_back(B.First);
while (!Last->Children.empty()) while (!A.Last->Children.empty()) {
Last = &Last->Children.back(); A.Last->Children[0].Parent = A.Last;
return Last; A.Last = &A.Last->Children[0];
}
} }
bool touchesRanges(const UnwrappedLine &TheLine) { bool touchesRanges(const AnnotatedLine &TheLine) {
const FormatToken *First = &TheLine.RootToken; const FormatToken *First = &TheLine.First.FormatTok;
const FormatToken *Last = getLastInLine(TheLine); const FormatToken *Last = &TheLine.Last->FormatTok;
CharSourceRange LineRange = CharSourceRange::getTokenRange( CharSourceRange LineRange = CharSourceRange::getTokenRange(
First->Tok.getLocation(), First->Tok.getLocation(),
Last->Tok.getLocation()); Last->Tok.getLocation());
@ -1478,7 +1492,8 @@ private:
} }
virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
UnwrappedLines.push_back(TheLine); AnnotatedLines.push_back(
AnnotatedLine(TheLine.RootToken, TheLine.Level, TheLine.InPPDirective));
} }
/// \brief Add a new line and the required indent before the first Token /// \brief Add a new line and the required indent before the first Token
@ -1526,7 +1541,7 @@ private:
SourceManager &SourceMgr; SourceManager &SourceMgr;
tooling::Replacements Replaces; tooling::Replacements Replaces;
std::vector<CharSourceRange> Ranges; std::vector<CharSourceRange> Ranges;
std::vector<UnwrappedLine> UnwrappedLines; std::vector<AnnotatedLine> AnnotatedLines;
bool StructuralError; bool StructuralError;
}; };