forked from OSchip/llvm-project
Allow the preprocessor to cache the lexed tokens, so that we can do efficient lookahead and backtracking.
1) New public methods added: -EnableBacktrackAtThisPos -DisableBacktrack -Backtrack -isBacktrackEnabled 2) LookAhead() implementation is replaced with a more efficient one. 3) LookNext() is removed. llvm-svn: 54611
This commit is contained in:
parent
07bb087ac1
commit
b3dd1e0889
|
@ -71,6 +71,9 @@ class Preprocessor {
|
|||
bool DisableMacroExpansion : 1; // True if macro expansion is disabled.
|
||||
bool InMacroArgs : 1; // True if parsing fn macro invocation args.
|
||||
|
||||
/// CacheTokens - True when the lexed tokens are cached for backtracking.
|
||||
bool CacheTokens : 1;
|
||||
|
||||
/// Identifiers - This is mapping/lookup information for all identifiers in
|
||||
/// the program, including program keywords.
|
||||
IdentifierTable Identifiers;
|
||||
|
@ -139,10 +142,24 @@ class Preprocessor {
|
|||
unsigned NumCachedTokenLexers;
|
||||
TokenLexer *TokenLexerCache[TokenLexerCacheSize];
|
||||
|
||||
/// PeekedToken - Cache the token that was retrieved through LookNext().
|
||||
/// This is a valid token (its Location is valid) when LookNext() is
|
||||
/// called and gets invalid again when it is "consumed" by Lex().
|
||||
Token PeekedToken;
|
||||
// Cached tokens state.
|
||||
|
||||
typedef std::vector<Token> CachedTokensTy;
|
||||
|
||||
/// CachedTokens - Cached tokens are stored here when we do backtracking or
|
||||
/// lookahead. They are "lexed" by the CachingLex() method.
|
||||
CachedTokensTy CachedTokens;
|
||||
|
||||
/// CachedLexPos - The position of the cached token that CachingLex() should
|
||||
/// "lex" next. If it points beyond the CachedTokens vector, it means that
|
||||
/// a normal Lex() should be invoked.
|
||||
CachedTokensTy::size_type CachedLexPos;
|
||||
|
||||
/// CachedBacktrackPos - Gets set by the EnableBacktrackAtThisPos() method,
|
||||
/// to indicate the position where CachedLexPos should be set when the
|
||||
/// BackTrack() method is invoked.
|
||||
CachedTokensTy::size_type CachedBacktrackPos;
|
||||
|
||||
public:
|
||||
Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target,
|
||||
SourceManager &SM, HeaderSearch &Headers);
|
||||
|
@ -259,6 +276,44 @@ public:
|
|||
/// state of the top-of-stack lexer is known.
|
||||
void RemoveTopOfLexerStack();
|
||||
|
||||
/// EnableBacktrackAtThisPos - From the point that this method is called, and
|
||||
/// until DisableBacktrack() or Backtrack() is called, the Preprocessor keeps
|
||||
/// track of the lexed tokens so that a subsequent Backtrack() call will make
|
||||
/// the Preprocessor re-lex the same tokens.
|
||||
///
|
||||
/// EnableBacktrackAtThisPos should not be called again until DisableBacktrack
|
||||
/// or Backtrack is called.
|
||||
///
|
||||
/// NOTE: *DO NOT* forget to call either DisableBacktrack() or Backtrack() at
|
||||
/// some point after EnableBacktrackAtThisPos. If you don't, caching of tokens
|
||||
/// will continue indefinitely.
|
||||
///
|
||||
void EnableBacktrackAtThisPos() {
|
||||
assert(!CacheTokens && "Backtrack is already enabled!");
|
||||
CacheTokens = true;
|
||||
CachedBacktrackPos = CachedLexPos;
|
||||
EnterCachingLexMode();
|
||||
}
|
||||
|
||||
/// DisableBacktrack - Stop the caching of tokens that was enabled by
|
||||
/// EnableBacktrackAtThisPos().
|
||||
void DisableBacktrack() {
|
||||
assert(CacheTokens && "Backtrack is not enabled!");
|
||||
CacheTokens = false;
|
||||
}
|
||||
|
||||
/// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
|
||||
/// EnableBacktrackAtThisPos() was previously called.
|
||||
void Backtrack() {
|
||||
assert(CacheTokens && "Backtrack is not enabled!");
|
||||
CacheTokens = false;
|
||||
CachedLexPos = CachedBacktrackPos;
|
||||
}
|
||||
|
||||
/// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
|
||||
/// caching of tokens is on.
|
||||
bool isBacktrackEnabled() const { return CacheTokens; }
|
||||
|
||||
/// Lex - To lex a token from the preprocessor, just pull a token from the
|
||||
/// current lexer or macro object.
|
||||
void Lex(Token &Result) {
|
||||
|
@ -266,11 +321,8 @@ public:
|
|||
CurLexer->Lex(Result);
|
||||
else if (CurTokenLexer)
|
||||
CurTokenLexer->Lex(Result);
|
||||
else {
|
||||
// We have a peeked token that hasn't been consumed yet.
|
||||
Result = PeekedToken;
|
||||
ConsumedPeekedToken();
|
||||
}
|
||||
else
|
||||
CachingLex(Result);
|
||||
}
|
||||
|
||||
/// LexNonComment - Lex a token. If it's a comment, keep lexing until we get
|
||||
|
@ -300,32 +352,12 @@ public:
|
|||
/// returned by Lex(), LookAhead(1) returns the token after it, etc. This
|
||||
/// returns normal tokens after phase 5. As such, it is equivalent to using
|
||||
/// 'Lex', not 'LexUnexpandedToken'.
|
||||
///
|
||||
/// NOTE: is a relatively expensive method, so it should not be used in common
|
||||
/// code paths if possible!
|
||||
///
|
||||
Token LookAhead(unsigned N);
|
||||
|
||||
/// LookNext - Returns the next token that would be returned by Lex() without
|
||||
/// consuming it.
|
||||
const Token &LookNext() {
|
||||
if (PeekedToken.getLocation().isInvalid()) {
|
||||
// We don't have a peeked token that hasn't been consumed yet.
|
||||
// Peek it now.
|
||||
PeekToken();
|
||||
const Token &LookAhead(unsigned N) {
|
||||
if (CachedLexPos + N < CachedTokens.size())
|
||||
return CachedTokens[CachedLexPos+N];
|
||||
else
|
||||
return PeekAhead(N+1);
|
||||
}
|
||||
return PeekedToken;
|
||||
}
|
||||
|
||||
private:
|
||||
/// PeekToken - Lexes one token into PeekedToken and pushes CurLexer,
|
||||
/// CurLexerToken into the IncludeMacroStack before setting them to null.
|
||||
void PeekToken();
|
||||
|
||||
/// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken
|
||||
/// and have it "consumed".
|
||||
void ConsumedPeekedToken();
|
||||
public:
|
||||
|
||||
/// Diag - Forwarding function for diagnostics. This emits a diagnostic at
|
||||
/// the specified Token's location, translating the token's start
|
||||
|
@ -523,6 +555,17 @@ private:
|
|||
bool isAngled, const DirectoryLookup *FromDir,
|
||||
const DirectoryLookup *&CurDir);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Caching stuff.
|
||||
void CachingLex(Token &Result);
|
||||
bool InCachingLexMode() const { return CurLexer == 0 && CurTokenLexer == 0; }
|
||||
void EnterCachingLexMode();
|
||||
void ExitCachingLexMode() {
|
||||
if (InCachingLexMode())
|
||||
RemoveTopOfLexerStack();
|
||||
}
|
||||
const Token &PeekAhead(unsigned N);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
/// Handle*Directive - implement the various preprocessor directives. These
|
||||
/// should side-effect the current preprocessor object so that the next call
|
||||
|
|
|
@ -198,10 +198,7 @@ private:
|
|||
/// Note that this differs from the Preprocessor's LookAhead method, because
|
||||
/// the Parser always has one token lexed that the preprocessor doesn't.
|
||||
///
|
||||
/// NOTE: is a relatively expensive method, so it should not be used in common
|
||||
/// code paths if possible!
|
||||
///
|
||||
Token GetLookAheadToken(unsigned N) {
|
||||
const Token &GetLookAheadToken(unsigned N) {
|
||||
if (N == 0 || Tok.is(tok::eof)) return Tok;
|
||||
return PP.LookAhead(N-1);
|
||||
}
|
||||
|
@ -209,7 +206,7 @@ private:
|
|||
/// NextToken - This peeks ahead one token and returns it without
|
||||
/// consuming it.
|
||||
const Token &NextToken() {
|
||||
return PP.LookNext();
|
||||
return PP.LookAhead(0);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements pieces of the Preprocessor interface that manage the
|
||||
// caching of lexed tokens.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
using namespace clang;
|
||||
|
||||
void Preprocessor::CachingLex(Token &Result) {
|
||||
if (CachedLexPos < CachedTokens.size()) {
|
||||
Result = CachedTokens[CachedLexPos++];
|
||||
return;
|
||||
}
|
||||
|
||||
ExitCachingLexMode();
|
||||
Lex(Result);
|
||||
|
||||
if (!CacheTokens) {
|
||||
// All cached tokens were consumed.
|
||||
CachedTokens.clear();
|
||||
CachedLexPos = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// We should cache the lexed token.
|
||||
|
||||
EnterCachingLexMode();
|
||||
if (Result.isNot(tok::eof)) {
|
||||
CachedTokens.push_back(Result);
|
||||
++CachedLexPos;
|
||||
}
|
||||
}
|
||||
|
||||
void Preprocessor::EnterCachingLexMode() {
|
||||
if (InCachingLexMode())
|
||||
return;
|
||||
|
||||
IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
|
||||
CurTokenLexer));
|
||||
CurLexer = 0;
|
||||
CurTokenLexer = 0;
|
||||
}
|
||||
|
||||
|
||||
const Token &Preprocessor::PeekAhead(unsigned N) {
|
||||
assert(CachedLexPos + N > CachedTokens.size() && "Confused caching.");
|
||||
ExitCachingLexMode();
|
||||
for (unsigned C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
|
||||
CachedTokens.push_back(Token());
|
||||
Lex(CachedTokens.back());
|
||||
}
|
||||
EnterCachingLexMode();
|
||||
return CachedTokens.back();
|
||||
}
|
|
@ -60,94 +60,6 @@ Lexer *Preprocessor::getCurrentFileLexer() const {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/// LookAhead - This peeks ahead N tokens and returns that token without
|
||||
/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns
|
||||
/// the token after Tok, etc.
|
||||
///
|
||||
/// NOTE: is a relatively expensive method, so it should not be used in common
|
||||
/// code paths if possible!
|
||||
///
|
||||
Token Preprocessor::LookAhead(unsigned N) {
|
||||
// FIXME: Optimize the case where multiple lookahead calls are used back to
|
||||
// back. Consider if the the parser contained (dynamically):
|
||||
// Lookahead(1); Lookahead(1); Lookahead(1)
|
||||
// This would return the same token 3 times, but would end up making lots of
|
||||
// token stream lexers to do it. To handle this common case, see if the top
|
||||
// of the lexer stack is a TokenStreamLexer with macro expansion disabled. If
|
||||
// so, see if it has 'N' tokens available in it. If so, just return the
|
||||
// token.
|
||||
|
||||
// FIXME: Optimize the case when the parser does multiple nearby lookahead
|
||||
// calls. For example, consider:
|
||||
// Lookahead(0); Lookahead(1); Lookahead(2);
|
||||
// The previous optimization won't apply, and there won't be any space left in
|
||||
// the array that was previously new'd. To handle this, always round up the
|
||||
// size we new to a multiple of 16 tokens. If the previous buffer has space
|
||||
// left, we can just grow it. This means we only have to do the new 1/16th as
|
||||
// often.
|
||||
|
||||
// Optimized LookAhead(0) case.
|
||||
if (N == 0)
|
||||
return LookNext();
|
||||
|
||||
Token *LookaheadTokens = new Token[N+1];
|
||||
|
||||
// Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token
|
||||
// to return.
|
||||
Token Tok;
|
||||
unsigned NumTokens = 0;
|
||||
for (; N != ~0U; --N, ++NumTokens) {
|
||||
Lex(Tok);
|
||||
LookaheadTokens[NumTokens] = Tok;
|
||||
|
||||
// If we got to EOF, don't lex past it. This will cause LookAhead to return
|
||||
// the EOF token.
|
||||
if (Tok.is(tok::eof))
|
||||
break;
|
||||
}
|
||||
|
||||
// Okay, at this point, we have the token we want to return in Tok. However,
|
||||
// we read it and a bunch of other stuff (in LookaheadTokens) that we must
|
||||
// allow subsequent calls to 'Lex' to return. To do this, we push a new token
|
||||
// lexer onto the lexer stack with the tokens we read here. This passes
|
||||
// ownership of LookaheadTokens to EnterTokenStream.
|
||||
//
|
||||
// Note that we disable macro expansion of the tokens from this buffer, since
|
||||
// any macros have already been expanded, and the internal preprocessor state
|
||||
// may already read past new macros. Consider something like LookAhead(1) on
|
||||
// X
|
||||
// #define X 14
|
||||
// Y
|
||||
// The lookahead call should return 'Y', and the next Lex call should return
|
||||
// 'X' even though X -> 14 has already been entered as a macro.
|
||||
//
|
||||
EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
|
||||
true /*OwnsTokens*/);
|
||||
return Tok;
|
||||
}
|
||||
|
||||
/// PeekToken - Lexes one token into PeekedToken and pushes CurLexer,
|
||||
/// CurLexerToken into the IncludeMacroStack before setting them to null.
|
||||
void Preprocessor::PeekToken() {
|
||||
Lex(PeekedToken);
|
||||
// Cache the current Lexer, TokenLexer and set them both to null.
|
||||
// When Lex() is called, PeekedToken will be "consumed".
|
||||
IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
|
||||
CurTokenLexer));
|
||||
CurLexer = 0;
|
||||
CurTokenLexer = 0;
|
||||
}
|
||||
|
||||
/// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken
|
||||
/// and have it "consumed".
|
||||
void Preprocessor::ConsumedPeekedToken() {
|
||||
assert(PeekedToken.getLocation().isValid() && "Confused Peeking?");
|
||||
// Restore CurLexer, TokenLexer.
|
||||
RemoveTopOfLexerStack();
|
||||
// Make PeekedToken invalid.
|
||||
PeekedToken.startToken();
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Methods for Entering and Callbacks for leaving various contexts
|
||||
|
|
|
@ -68,6 +68,9 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
|
|||
InMacroArgs = false;
|
||||
NumCachedTokenLexers = 0;
|
||||
|
||||
CacheTokens = false;
|
||||
CachedLexPos = 0;
|
||||
|
||||
// "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
|
||||
// This gets unpoisoned where it is allowed.
|
||||
(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
|
||||
|
@ -579,4 +582,3 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
|
|||
if (II.isExtensionToken() && Features.C99)
|
||||
Diag(Identifier, diag::ext_token_used);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue