Refactor datastructure used in clang-format.

Main difference, add an AnnotatedLine class to hold information about a
line while formatting. At the same time degrade the UnwrappedLine class
to a class solely used for communicating between the UnwrappedLineParser
and the Formatter.

No functional changes intended.

llvm-svn: 172403
This commit is contained in:
Daniel Jasper 2013-01-14 13:08:07 +00:00
parent 13f23e17c7
commit f1e4b7d750
1 changed files with 137 additions and 122 deletions

View File

@ -93,6 +93,19 @@ public:
AnnotatedToken *Parent;
};
class AnnotatedLine {
public:
AnnotatedLine(const FormatToken &FormatTok, unsigned Level,
bool InPPDirective)
: First(FormatTok), Level(Level), InPPDirective(InPPDirective) {}
AnnotatedToken First;
AnnotatedToken *Last;
LineType Type;
unsigned Level;
bool InPPDirective;
};
static prec::Level getPrecedence(const AnnotatedToken &Tok) {
return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
}
@ -168,11 +181,13 @@ static void replacePPWhitespace(
NewLineText + std::string(Spaces, ' ')));
}
/// \brief Checks whether the (remaining) \c UnwrappedLine starting with
/// \p RootToken fits into \p Limit columns.
static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) {
/// \brief Calculates whether the (remaining) \c AnnotatedLine starting with
/// \p RootToken fits into \p Limit columns on a single line.
///
/// If true, sets \p Length to the required length.
static bool fitsIntoLimit(const AnnotatedToken &RootToken,
unsigned Limit, unsigned* Length = 0) {
unsigned Columns = RootToken.FormatTok.TokenLength;
bool FitsOnALine = true;
const AnnotatedToken *Tok = &RootToken;
while (!Tok->Children.empty()) {
Tok = &Tok->Children[0];
@ -181,11 +196,13 @@ static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) {
// needs to be put on a new line if the line needs to be split.
if (Columns > Limit ||
(Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) {
FitsOnALine = false;
break;
// FIXME: Remove this hack.
return false;
}
}
return FitsOnALine;
if (Length != 0)
*Length = Columns;
return true;
}
/// \brief Returns if a token is an Objective-C selector name.
@ -200,7 +217,7 @@ static bool isObjCSelectorName(const AnnotatedToken &Tok) {
class UnwrappedLineFormatter {
public:
UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
const UnwrappedLine &Line, unsigned FirstIndent,
const AnnotatedLine &Line, unsigned FirstIndent,
bool FitsOnALine, const AnnotatedToken &RootToken,
tooling::Replacements &Replaces, bool StructuralError)
: Style(Style), SourceMgr(SourceMgr), Line(Line),
@ -599,7 +616,7 @@ private:
FormatStyle Style;
SourceManager &SourceMgr;
const UnwrappedLine &Line;
const AnnotatedLine &Line;
const unsigned FirstIndent;
const bool FitsOnALine;
const AnnotatedToken &RootToken;
@ -616,10 +633,9 @@ private:
/// \c UnwrappedLine.
class TokenAnnotator {
public:
TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style,
SourceManager &SourceMgr, Lexer &Lex)
: Style(Style), SourceMgr(SourceMgr), Lex(Lex),
RootToken(Line.RootToken) {}
TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
AnnotatedLine &Line)
: Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {}
/// \brief A parser that gathers additional information about tokens.
///
@ -888,7 +904,9 @@ public:
};
void createAnnotatedTokens(AnnotatedToken &Current) {
if (!Current.FormatTok.Children.empty()) {
if (Current.FormatTok.Children.empty()) {
Line.Last = &Current;
} else {
Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0]));
Current.Children.back().Parent = &Current;
createAnnotatedTokens(Current.Children.back());
@ -917,34 +935,30 @@ public:
calculateExtraInformation(Current.Children[0]);
}
bool annotate() {
createAnnotatedTokens(RootToken);
void annotate() {
Line.Last = &Line.First;
createAnnotatedTokens(Line.First);
AnnotatingParser Parser(RootToken);
CurrentLineType = Parser.parseLine();
if (CurrentLineType == LT_Invalid)
return false;
AnnotatingParser Parser(Line.First);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
determineTokenTypes(RootToken, /*IsRHS=*/false);
determineTokenTypes(Line.First, /*IsRHS=*/false);
if (RootToken.Type == TT_ObjCMethodSpecifier)
CurrentLineType = LT_ObjCMethodDecl;
else if (RootToken.Type == TT_ObjCDecl)
CurrentLineType = LT_ObjCDecl;
else if (RootToken.Type == TT_ObjCProperty)
CurrentLineType = LT_ObjCProperty;
if (Line.First.Type == TT_ObjCMethodSpecifier)
Line.Type = LT_ObjCMethodDecl;
else if (Line.First.Type == TT_ObjCDecl)
Line.Type = LT_ObjCDecl;
else if (Line.First.Type == TT_ObjCProperty)
Line.Type = LT_ObjCProperty;
if (!RootToken.Children.empty())
calculateExtraInformation(RootToken.Children[0]);
return true;
}
Line.First.SpaceRequiredBefore = true;
Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
Line.First.CanBreakBefore = Line.First.MustBreakBefore;
LineType getLineType() {
return CurrentLineType;
}
const AnnotatedToken &getRootToken() {
return RootToken;
if (!Line.First.Children.empty())
calculateExtraInformation(Line.First.Children[0]);
}
private:
@ -1079,7 +1093,7 @@ private:
return false;
if (Right.is(tok::less) &&
(Left.is(tok::kw_template) ||
(CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
(Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
return true;
if (Left.is(tok::arrow) || Right.is(tok::arrow))
return false;
@ -1119,7 +1133,7 @@ private:
if (Left.is(tok::l_paren))
return false;
if (Right.is(tok::l_paren)) {
return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) ||
return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
@ -1134,7 +1148,7 @@ private:
}
bool spaceRequiredBefore(const AnnotatedToken &Tok) {
if (CurrentLineType == LT_ObjCMethodDecl) {
if (Line.Type == LT_ObjCMethodDecl) {
if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
return true;
@ -1151,7 +1165,7 @@ private:
// Don't space between ':' and '('
return false;
}
if (CurrentLineType == LT_ObjCProperty &&
if (Line.Type == LT_ObjCProperty &&
(Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
return false;
@ -1167,7 +1181,7 @@ private:
if (Tok.Parent->Type == TT_OverloadedOperator)
return false;
if (Tok.is(tok::colon))
return RootToken.isNot(tok::kw_case) && !Tok.Children.empty() &&
return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
Tok.Type != TT_ObjCMethodExpr;
if (Tok.Parent->Type == TT_UnaryOperator ||
Tok.Parent->Type == TT_CastRParen)
@ -1185,7 +1199,7 @@ private:
return true;
if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
return false;
if (Tok.is(tok::less) && RootToken.is(tok::hash))
if (Tok.is(tok::less) && Line.First.is(tok::hash))
return true;
if (Tok.Type == TT_TrailingUnaryOperator)
return false;
@ -1194,7 +1208,7 @@ private:
bool canBreakBefore(const AnnotatedToken &Right) {
const AnnotatedToken &Left = *Right.Parent;
if (CurrentLineType == LT_ObjCMethodDecl) {
if (Line.Type == LT_ObjCMethodDecl) {
if (Right.is(tok::identifier) && !Right.Children.empty() &&
Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
return true;
@ -1221,7 +1235,7 @@ private:
if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr)
return false;
if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl)
if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
return false;
if (Right.is(tok::comment))
@ -1246,8 +1260,7 @@ private:
FormatStyle Style;
SourceManager &SourceMgr;
Lexer &Lex;
LineType CurrentLineType;
AnnotatedToken RootToken;
AnnotatedLine &Line;
};
class LexerBasedFormatTokenSource : public FormatTokenSource {
@ -1347,35 +1360,32 @@ public:
UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
StructuralError = Parser.parse();
unsigned PreviousEndOfLineColumn = 0;
for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(),
E = UnwrappedLines.end();
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
Annotator.annotate();
}
for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
E = AnnotatedLines.end();
I != E; ++I) {
const UnwrappedLine &TheLine = *I;
if (touchesRanges(TheLine)) {
OwningPtr<TokenAnnotator> AnnotatedLine(
new TokenAnnotator(TheLine, Style, SourceMgr, Lex));
if (!AnnotatedLine->annotate())
break;
unsigned Indent = formatFirstToken(AnnotatedLine->getRootToken(),
TheLine.Level, TheLine.InPPDirective,
const AnnotatedLine &TheLine = *I;
if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) {
unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level,
TheLine.InPPDirective,
PreviousEndOfLineColumn);
UnwrappedLine Line(TheLine);
bool FitsOnALine = tryFitMultipleLinesInOne(Indent, Line, AnnotatedLine,
I, E);
UnwrappedLineFormatter Formatter(
Style, SourceMgr, Line, Indent, FitsOnALine,
AnnotatedLine->getRootToken(), Replaces, StructuralError);
bool FitsOnALine = tryFitMultipleLinesInOne(Indent, I, E);
UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
FitsOnALine, TheLine.First, Replaces,
StructuralError);
PreviousEndOfLineColumn = Formatter.format();
} else {
// If we did not reformat this unwrapped line, the column at the end of
// the last token is unchanged - thus, we can calculate the end of the
// last token, and return the result.
const FormatToken *Last = getLastInLine(TheLine);
PreviousEndOfLineColumn =
SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) +
Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr,
Lex.getLangOpts()) -
SourceMgr.getSpellingColumnNumber(
TheLine.Last->FormatTok.Tok.getLocation()) +
Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
SourceMgr, Lex.getLangOpts()) -
1;
}
}
@ -1389,81 +1399,85 @@ private:
/// if possible; note that \c I will be incremented when lines are merged.
///
/// Returns whether the resulting \c Line can fit in a single line.
bool tryFitMultipleLinesInOne(unsigned Indent, UnwrappedLine &Line,
OwningPtr<TokenAnnotator> &AnnotatedLine,
std::vector<UnwrappedLine>::iterator &I,
std::vector<UnwrappedLine>::iterator E) {
bool tryFitMultipleLinesInOne(unsigned Indent,
std::vector<AnnotatedLine>::iterator &I,
std::vector<AnnotatedLine>::iterator E) {
unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
// Check whether the UnwrappedLine can be put onto a single line. If
// so, this is bound to be the optimal solution (by definition) and we
// don't need to analyze the entire solution space.
bool FitsOnALine = fitsIntoLimit(AnnotatedLine->getRootToken(), Limit);
if (!FitsOnALine || I + 1 == E || I + 2 == E)
return FitsOnALine;
unsigned LengthLine1 = 0;
if (!fitsIntoLimit(I->First, Limit, &LengthLine1))
return false;
// Try to merge the next two lines if possible.
UnwrappedLine Combined(Line);
// Check that we still have three lines and they fit into the limit.
if (I + 1 == E || I + 2 == E)
return true;
unsigned LengthLine2 = 0;
unsigned LengthLine3 = 0;
if (!fitsIntoLimit((I + 1)->First, Limit, &LengthLine2) ||
!fitsIntoLimit((I + 2)->First, Limit, &LengthLine3))
return true;
if (LengthLine1 + LengthLine2 + LengthLine3 + 2 > Limit) // Two spaces.
return true;
// First, check that the current line allows merging. This is the case if
// we're not in a control flow statement and the last token is an opening
// brace.
FormatToken *Last = &Combined.RootToken;
AnnotatedLine& Line = *I;
bool AllowedTokens =
Last->Tok.isNot(tok::kw_if) && Last->Tok.isNot(tok::kw_while) &&
Last->Tok.isNot(tok::kw_do) && Last->Tok.isNot(tok::r_brace) &&
Last->Tok.isNot(tok::kw_else) && Last->Tok.isNot(tok::kw_try) &&
Last->Tok.isNot(tok::kw_catch) && Last->Tok.isNot(tok::kw_for) &&
Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
// This gets rid of all ObjC @ keywords and methods.
Last->Tok.isNot(tok::at) && Last->Tok.isNot(tok::minus) &&
Last->Tok.isNot(tok::plus);
while (!Last->Children.empty())
Last = &Last->Children.back();
if (!Last->Tok.is(tok::l_brace))
return FitsOnALine;
Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
Line.First.isNot(tok::plus);
if (Line.Last->isNot(tok::l_brace) || !AllowedTokens)
return true;
// Second, check that the next line does not contain any braces - if it
// does, readability declines when putting it into a single line.
const FormatToken *Next = &(I + 1)->RootToken;
while (Next) {
AllowedTokens = AllowedTokens && !Next->Tok.is(tok::l_brace) &&
!Next->Tok.is(tok::r_brace);
Last->Children.push_back(*Next);
Last = &Last->Children[0];
Last->Children.clear();
Next = Next->Children.empty() ? NULL : &Next->Children.back();
}
const AnnotatedToken *Tok = &(I + 1)->First;
if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
return true;
do {
if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
return true;
Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
} while (Tok != NULL);
// Last, check that the third line contains a single closing brace.
Next = &(I + 2)->RootToken;
AllowedTokens = AllowedTokens && Next->Tok.is(tok::r_brace);
if (!Next->Children.empty() || !AllowedTokens)
return FitsOnALine;
Last->Children.push_back(*Next);
Tok = &(I + 2)->First;
if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
Tok->MustBreakBefore)
return true;
OwningPtr<TokenAnnotator> CombinedAnnotator(
new TokenAnnotator(Combined, Style, SourceMgr, Lex));
if (CombinedAnnotator->annotate() &&
fitsIntoLimit(CombinedAnnotator->getRootToken(), Limit)) {
// If the merged line fits, we use that instead and skip the next two
// lines.
AnnotatedLine.reset(CombinedAnnotator.take());
Line = Combined;
// If the merged line fits, we use that instead and skip the next two lines.
Line.Last->Children.push_back((I + 1)->First);
while (!Line.Last->Children.empty()) {
Line.Last->Children[0].Parent = Line.Last;
Line.Last = &Line.Last->Children[0];
}
join(Line, *(I + 1));
join(Line, *(I + 2));
I += 2;
}
return FitsOnALine;
return true;
}
const FormatToken *getLastInLine(const UnwrappedLine &TheLine) {
const FormatToken *Last = &TheLine.RootToken;
while (!Last->Children.empty())
Last = &Last->Children.back();
return Last;
void join(AnnotatedLine &A, const AnnotatedLine &B) {
A.Last->Children.push_back(B.First);
while (!A.Last->Children.empty()) {
A.Last->Children[0].Parent = A.Last;
A.Last = &A.Last->Children[0];
}
}
bool touchesRanges(const UnwrappedLine &TheLine) {
const FormatToken *First = &TheLine.RootToken;
const FormatToken *Last = getLastInLine(TheLine);
bool touchesRanges(const AnnotatedLine &TheLine) {
const FormatToken *First = &TheLine.First.FormatTok;
const FormatToken *Last = &TheLine.Last->FormatTok;
CharSourceRange LineRange = CharSourceRange::getTokenRange(
First->Tok.getLocation(),
Last->Tok.getLocation());
@ -1478,7 +1492,8 @@ private:
}
virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
UnwrappedLines.push_back(TheLine);
AnnotatedLines.push_back(
AnnotatedLine(TheLine.RootToken, TheLine.Level, TheLine.InPPDirective));
}
/// \brief Add a new line and the required indent before the first Token
@ -1526,7 +1541,7 @@ private:
SourceManager &SourceMgr;
tooling::Replacements Replaces;
std::vector<CharSourceRange> Ranges;
std::vector<UnwrappedLine> UnwrappedLines;
std::vector<AnnotatedLine> AnnotatedLines;
bool StructuralError;
};