Comment parsing: couple TextTokenRetokenizer and comment parser together to

remove one of the two variable-length lookahead buffers.  Now retokenizer will
ask for more tokens when it needs them.

llvm-svn: 160680
This commit is contained in:
Dmitri Gribenko 2012-07-24 17:52:18 +00:00
parent 1bfd9dadda
commit 0a36302ae0
2 changed files with 44 additions and 50 deletions

View File

@ -27,6 +27,8 @@ namespace comments {
/// Doxygen comment parser.
class Parser {
friend class TextTokenRetokenizer;
Lexer &L;
Sema &S;

View File

@ -19,8 +19,8 @@ namespace comments {
/// Re-lexes a sequence of tok::text tokens.
class TextTokenRetokenizer {
llvm::BumpPtrAllocator &Allocator;
static const unsigned MaxTokens = 16;
SmallVector<Token, MaxTokens> Toks;
Parser &P;
SmallVector<Token, 16> Toks;
struct Position {
unsigned CurToken;
@ -39,7 +39,7 @@ class TextTokenRetokenizer {
/// Sets up the buffer pointers to point to current token.
void setupBuffer() {
assert(Pos.CurToken < Toks.size());
assert(!isEnd());
const Token &Tok = Toks[Pos.CurToken];
Pos.BufferStart = Tok.getText().begin();
@ -65,11 +65,27 @@ class TextTokenRetokenizer {
Pos.BufferPtr++;
if (Pos.BufferPtr == Pos.BufferEnd) {
Pos.CurToken++;
if (Pos.CurToken < Toks.size())
if (isEnd() && addToken()) {
assert(!isEnd());
setupBuffer();
}
}
}
/// Add a token.
/// Returns true on success, false if there are no interesting tokens to
/// fetch from lexer.
bool addToken() {
if (P.Tok.isNot(tok::text))
return false;
Toks.push_back(P.Tok);
P.consumeToken();
if (Toks.size() == 1)
setupBuffer();
return true;
}
static bool isWhitespace(char C) {
return C == ' ' || C == '\n' || C == '\r' ||
C == '\t' || C == '\f' || C == '\v';
@ -100,22 +116,10 @@ class TextTokenRetokenizer {
}
public:
TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator):
Allocator(Allocator) {
TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
Allocator(Allocator), P(P) {
Pos.CurToken = 0;
}
/// Add a token.
/// Returns true on success, false if it seems like we have enough tokens.
bool addToken(const Token &Tok) {
assert(Tok.is(tok::text));
if (Toks.size() >= MaxTokens)
return false;
Toks.push_back(Tok);
if (Toks.size() == 1)
setupBuffer();
return true;
addToken();
}
/// Extract a word -- sequence of non-whitespace characters.
@ -199,23 +203,27 @@ public:
return true;
}
/// Return a text token. Useful to take tokens back.
bool lexText(Token &Tok) {
/// Put back tokens that we didn't consume.
void putBackLeftoverTokens() {
if (isEnd())
return false;
return;
if (Pos.BufferPtr != Pos.BufferStart)
formTokenWithChars(Tok, getSourceLocation(),
bool HavePartialTok = false;
Token PartialTok;
if (Pos.BufferPtr != Pos.BufferStart) {
formTokenWithChars(PartialTok, getSourceLocation(),
Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
StringRef(Pos.BufferPtr,
Pos.BufferEnd - Pos.BufferPtr));
else
Tok = Toks[Pos.CurToken];
HavePartialTok = true;
Pos.CurToken++;
}
Pos.CurToken++;
if (Pos.CurToken < Toks.size())
setupBuffer();
return true;
P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
Pos.CurToken = Toks.size();
if (HavePartialTok)
P.putBack(PartialTok);
}
};
@ -296,24 +304,14 @@ BlockCommandComment *Parser::parseBlockCommand() {
if (IsParam || NumArgs > 0) {
// In order to parse command arguments we need to retokenize a few
// following text tokens.
TextTokenRetokenizer Retokenizer(Allocator);
while (Tok.is(tok::text)) {
if (Retokenizer.addToken(Tok))
consumeToken();
}
TextTokenRetokenizer Retokenizer(Allocator, *this);
if (IsParam)
PC = parseParamCommandArgs(PC, Retokenizer);
else
BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
// Put back tokens we didn't use.
SmallVector<Token, 16> TextToks;
Token Text;
while (Retokenizer.lexText(Text)) {
TextToks.push_back(Text);
}
putBack(TextToks);
Retokenizer.putBackLeftoverTokens();
}
BlockContentComment *Block = parseParagraphOrBlockCommand();
@ -331,11 +329,7 @@ InlineCommandComment *Parser::parseInlineCommand() {
const Token CommandTok = Tok;
consumeToken();
TextTokenRetokenizer Retokenizer(Allocator);
while (Tok.is(tok::text)) {
if (Retokenizer.addToken(Tok))
consumeToken();
}
TextTokenRetokenizer Retokenizer(Allocator, *this);
Token ArgTok;
bool ArgTokValid = Retokenizer.lexWord(ArgTok);
@ -354,9 +348,7 @@ InlineCommandComment *Parser::parseInlineCommand() {
CommandTok.getCommandName());
}
Token Text;
while (Retokenizer.lexText(Text))
putBack(Text);
Retokenizer.putBackLeftoverTokens();
return IC;
}