forked from OSchip/llvm-project
PTH:
- Embed 'eom' tokens in PTH file. - Use embedded 'eom' tokens to not lazily generate them in the PTHLexer. This means that PTHLexer can always advance to the next token after reading a token (instead of buffering tokens using a copy). - Moved logic of 'ReadToken' into Lex. GetToken & ReadToken no longer exist. - These changes result in a 3.3% speedup (-Eonly) on Cocoa.h. - The code is a little gross. Many cleanups are possible and should be done. llvm-svn: 61360
This commit is contained in:
parent
033f56d533
commit
1b18ad240c
|
@ -159,23 +159,43 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
|
|||
typedef std::vector<std::pair<Offset, unsigned> > PPCondTable;
|
||||
PPCondTable PPCond;
|
||||
std::vector<unsigned> PPStartCond;
|
||||
bool ParsingPreprocessorDirective = false;
|
||||
|
||||
Token Tok;
|
||||
|
||||
do {
|
||||
L.LexFromRawLexer(Tok);
|
||||
|
||||
if ((Tok.isAtStartOfLine() || Tok.is(tok::eof)) &&
|
||||
ParsingPreprocessorDirective) {
|
||||
// Insert an eom token into the token cache. It has the same
|
||||
// position as the next token that is not on the same line as the
|
||||
// preprocessor directive. Observe that we continue processing
|
||||
// 'Tok' when we exit this branch.
|
||||
Token Tmp = Tok;
|
||||
Tmp.setKind(tok::eom);
|
||||
Tmp.clearFlag(Token::StartOfLine);
|
||||
Tmp.setIdentifierInfo(0);
|
||||
EmitToken(Out, Tmp, SMgr, idcount, IM);
|
||||
ParsingPreprocessorDirective = false;
|
||||
}
|
||||
|
||||
if (Tok.is(tok::identifier)) {
|
||||
Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
|
||||
continue;
|
||||
}
|
||||
else if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
|
||||
|
||||
if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
|
||||
// Special processing for #include. Store the '#' token and lex
|
||||
// the next token.
|
||||
assert(!ParsingPreprocessorDirective);
|
||||
Offset HashOff = (Offset) Out.tell();
|
||||
EmitToken(Out, Tok, SMgr, idcount, IM);
|
||||
|
||||
// Get the next token.
|
||||
L.LexFromRawLexer(Tok);
|
||||
|
||||
assert(!Tok.isAtStartOfLine());
|
||||
|
||||
// Did we see 'include'/'import'/'include_next'?
|
||||
if (!Tok.is(tok::identifier))
|
||||
|
@ -185,27 +205,37 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
|
|||
Tok.setIdentifierInfo(II);
|
||||
tok::PPKeywordKind K = II->getPPKeywordID();
|
||||
|
||||
if (K == tok::pp_include || K == tok::pp_import ||
|
||||
K == tok::pp_include_next) {
|
||||
|
||||
assert(K != tok::pp_not_keyword);
|
||||
ParsingPreprocessorDirective = true;
|
||||
|
||||
switch (K) {
|
||||
default:
|
||||
break;
|
||||
case tok::pp_include:
|
||||
case tok::pp_import:
|
||||
case tok::pp_include_next: {
|
||||
// Save the 'include' token.
|
||||
EmitToken(Out, Tok, SMgr, idcount, IM);
|
||||
|
||||
// Lex the next token as an include string.
|
||||
L.setParsingPreprocessorDirective(true);
|
||||
L.LexIncludeFilename(Tok);
|
||||
L.setParsingPreprocessorDirective(false);
|
||||
|
||||
assert(!Tok.isAtStartOfLine());
|
||||
if (Tok.is(tok::identifier))
|
||||
Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
|
||||
|
||||
break;
|
||||
}
|
||||
else if (K == tok::pp_if || K == tok::pp_ifdef || K == tok::pp_ifndef) {
|
||||
case tok::pp_if:
|
||||
case tok::pp_ifdef:
|
||||
case tok::pp_ifndef: {
|
||||
// Ad an entry for '#if' and friends. We initially set the target index
|
||||
// to 0. This will get backpatched when we hit #endif.
|
||||
PPStartCond.push_back(PPCond.size());
|
||||
PPCond.push_back(std::make_pair(HashOff, 0U));
|
||||
break;
|
||||
}
|
||||
else if (K == tok::pp_endif) {
|
||||
case tok::pp_endif: {
|
||||
// Add an entry for '#endif'. We set the target table index to itself.
|
||||
// This will later be set to zero when emitting to the PTH file. We
|
||||
// use 0 for uninitialized indices because that is easier to debug.
|
||||
|
@ -218,9 +248,11 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
|
|||
PPStartCond.pop_back();
|
||||
// Add the new entry to PPCond.
|
||||
PPCond.push_back(std::make_pair(HashOff, index));
|
||||
break;
|
||||
}
|
||||
else if (K == tok::pp_elif || K == tok::pp_else) {
|
||||
// Add an entry for '#elif' or '#else.
|
||||
case tok::pp_elif:
|
||||
case tok::pp_else: {
|
||||
// Add an entry for #elif or #else.
|
||||
// This serves as both a closing and opening of a conditional block.
|
||||
// This means that its entry will get backpatched later.
|
||||
unsigned index = PPCond.size();
|
||||
|
@ -233,6 +265,8 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
|
|||
// Now add '#elif' as a new block opening.
|
||||
PPCond.push_back(std::make_pair(HashOff, 0U));
|
||||
PPStartCond.push_back(index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,9 +51,7 @@ class PTHLexer : public PreprocessorLexer {
|
|||
/// PTHMgr - The PTHManager object that created this PTHLexer.
|
||||
PTHManager& PTHMgr;
|
||||
|
||||
Token LastFetched;
|
||||
Token EofToken;
|
||||
bool NeedsFetching;
|
||||
|
||||
public:
|
||||
|
||||
|
@ -95,20 +93,7 @@ public:
|
|||
/// SkipBlock - Used by Preprocessor to skip the current conditional block.
|
||||
bool SkipBlock();
|
||||
|
||||
private:
|
||||
/// AtLastToken - Returns true if the PTHLexer is at the last token.
|
||||
bool AtLastToken() {
|
||||
Token T = GetToken();
|
||||
return T.is(tok::eof) ? EofToken = T, true : false;
|
||||
}
|
||||
|
||||
/// GetToken - Returns the next token. This method does not advance the
|
||||
/// PTHLexer to the next token.
|
||||
Token GetToken();
|
||||
|
||||
/// AdvanceToken - Advances the PTHLexer to the next token.
|
||||
void AdvanceToken() { NeedsFetching = true; }
|
||||
|
||||
private:
|
||||
bool LexEndOfFile(Token &Result);
|
||||
};
|
||||
|
||||
|
|
|
@ -339,6 +339,13 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() {
|
|||
// If the #if block wasn't entered then enter the #else block now.
|
||||
if (!CondInfo.FoundNonSkip) {
|
||||
CondInfo.FoundNonSkip = true;
|
||||
|
||||
// Consume the eom token.
|
||||
CurPTHLexer->ParsingPreprocessorDirective = true;
|
||||
LexUnexpandedToken(Tok);
|
||||
assert(Tok.is(tok::eom));
|
||||
CurPTHLexer->ParsingPreprocessorDirective = false;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,129 +28,6 @@ using namespace clang;
|
|||
|
||||
#define DISK_TOKEN_SIZE (2+3*4)
|
||||
|
||||
PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
|
||||
const char* ppcond, PTHManager& PM)
|
||||
: PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
|
||||
PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
|
||||
// Make sure the EofToken is completely clean.
|
||||
EofToken.startToken();
|
||||
}
|
||||
|
||||
Token PTHLexer::GetToken() {
|
||||
// Read the next token, or if we haven't advanced yet, get the last
|
||||
// token read.
|
||||
if (NeedsFetching) {
|
||||
NeedsFetching = false;
|
||||
ReadToken(LastFetched);
|
||||
}
|
||||
|
||||
Token Tok = LastFetched;
|
||||
|
||||
// If we are in raw mode, zero out identifier pointers. This is
|
||||
// needed for 'pragma poison'. Note that this requires that the Preprocessor
|
||||
// can go back to the original source when it calls getSpelling().
|
||||
if (LexingRawMode && Tok.is(tok::identifier))
|
||||
Tok.setIdentifierInfo(0);
|
||||
|
||||
return Tok;
|
||||
}
|
||||
|
||||
void PTHLexer::Lex(Token& Tok) {
|
||||
LexNextToken:
|
||||
Tok = GetToken();
|
||||
|
||||
if (AtLastToken()) {
|
||||
Preprocessor *PPCache = PP;
|
||||
|
||||
if (LexEndOfFile(Tok))
|
||||
return;
|
||||
|
||||
assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
|
||||
return PPCache->Lex(Tok);
|
||||
}
|
||||
|
||||
// Don't advance to the next token yet. Check if we are at the
|
||||
// start of a new line and we're processing a directive. If so, we
|
||||
// consume this token twice, once as an tok::eom.
|
||||
if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
|
||||
ParsingPreprocessorDirective = false;
|
||||
Tok.setKind(tok::eom);
|
||||
MIOpt.ReadToken();
|
||||
return;
|
||||
}
|
||||
|
||||
// Advance to the next token.
|
||||
AdvanceToken();
|
||||
|
||||
if (Tok.is(tok::hash)) {
|
||||
if (Tok.isAtStartOfLine()) {
|
||||
LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
|
||||
if (!LexingRawMode) {
|
||||
PP->HandleDirective(Tok);
|
||||
|
||||
if (PP->isCurrentLexer(this))
|
||||
goto LexNextToken;
|
||||
|
||||
return PP->Lex(Tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MIOpt.ReadToken();
|
||||
|
||||
if (Tok.is(tok::identifier)) {
|
||||
if (LexingRawMode) return;
|
||||
return PP->HandleIdentifier(Tok);
|
||||
}
|
||||
}
|
||||
|
||||
bool PTHLexer::LexEndOfFile(Token &Tok) {
|
||||
|
||||
if (ParsingPreprocessorDirective) {
|
||||
ParsingPreprocessorDirective = false;
|
||||
Tok.setKind(tok::eom);
|
||||
MIOpt.ReadToken();
|
||||
return true; // Have a token.
|
||||
}
|
||||
|
||||
if (LexingRawMode) {
|
||||
MIOpt.ReadToken();
|
||||
return true; // Have an eof token.
|
||||
}
|
||||
|
||||
// FIXME: Issue diagnostics similar to Lexer.
|
||||
return PP->HandleEndOfFile(Tok, false);
|
||||
}
|
||||
|
||||
void PTHLexer::setEOF(Token& Tok) {
|
||||
assert(!EofToken.is(tok::eof));
|
||||
Tok = EofToken;
|
||||
}
|
||||
|
||||
void PTHLexer::DiscardToEndOfLine() {
|
||||
assert(ParsingPreprocessorDirective && ParsingFilename == false &&
|
||||
"Must be in a preprocessing directive!");
|
||||
|
||||
// Skip tokens by only peeking at their token kind and the flags.
|
||||
// We don't need to actually reconstruct full tokens from the token buffer.
|
||||
// This saves some copies and it also reduces IdentifierInfo* lookup.
|
||||
const char* p = CurPtr;
|
||||
while (1) {
|
||||
// Read the token kind. Are we at the end of the file?
|
||||
tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
|
||||
if (x == tok::eof) break;
|
||||
|
||||
// Read the token flags. Are we at the start of the next line?
|
||||
Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
|
||||
if (y & Token::StartOfLine) break;
|
||||
|
||||
// Skip to the next token.
|
||||
p += DISK_TOKEN_SIZE;
|
||||
}
|
||||
|
||||
CurPtr = p;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Utility methods for reading from the mmap'ed PTH file.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -167,6 +44,150 @@ static inline uint32_t Read32(const char*& data) {
|
|||
return V;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTHLexer methods.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
|
||||
const char* ppcond, PTHManager& PM)
|
||||
: PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
|
||||
PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {}
|
||||
|
||||
void PTHLexer::Lex(Token& Tok) {
|
||||
LexNextToken:
|
||||
|
||||
// Read the token.
|
||||
// FIXME: Setting the flags directly should obviate this step.
|
||||
Tok.startToken();
|
||||
|
||||
// Shadow CurPtr into an automatic variable so that Read8 doesn't load and
|
||||
// store back into the instance variable.
|
||||
const char *CurPtrShadow = CurPtr;
|
||||
|
||||
// Read the type of the token.
|
||||
Tok.setKind((tok::TokenKind) Read8(CurPtrShadow));
|
||||
|
||||
// Set flags. This is gross, since we are really setting multiple flags.
|
||||
Tok.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
|
||||
|
||||
// Set the IdentifierInfo* (if any).
|
||||
Tok.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
|
||||
|
||||
// Set the SourceLocation. Since all tokens are constructed using a
|
||||
// raw lexer, they will all be offseted from the same FileID.
|
||||
Tok.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
|
||||
|
||||
// Finally, read and set the length of the token.
|
||||
Tok.setLength(Read32(CurPtrShadow));
|
||||
|
||||
CurPtr = CurPtrShadow;
|
||||
|
||||
if (Tok.is(tok::eof)) {
|
||||
// Save the end-of-file token.
|
||||
EofToken = Tok;
|
||||
|
||||
Preprocessor *PPCache = PP;
|
||||
|
||||
if (LexEndOfFile(Tok))
|
||||
return;
|
||||
|
||||
assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
|
||||
return PPCache->Lex(Tok);
|
||||
}
|
||||
|
||||
MIOpt.ReadToken();
|
||||
|
||||
if (Tok.is(tok::eom)) {
|
||||
ParsingPreprocessorDirective = false;
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
SourceManager& SM = PP->getSourceManager();
|
||||
SourceLocation L = Tok.getLocation();
|
||||
|
||||
static const char* last = 0;
|
||||
const char* next = SM.getContentCacheForLoc(L)->Entry->getName();
|
||||
if (next != last) {
|
||||
last = next;
|
||||
llvm::cerr << next << '\n';
|
||||
}
|
||||
|
||||
llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " <<
|
||||
SM.getLogicalColumnNumber(L) << '\n';
|
||||
#endif
|
||||
|
||||
if (Tok.is(tok::hash)) {
|
||||
if (Tok.isAtStartOfLine()) {
|
||||
LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
|
||||
if (!LexingRawMode) {
|
||||
PP->HandleDirective(Tok);
|
||||
|
||||
if (PP->isCurrentLexer(this))
|
||||
goto LexNextToken;
|
||||
|
||||
return PP->Lex(Tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Tok.is(tok::identifier)) {
|
||||
if (LexingRawMode) {
|
||||
Tok.setIdentifierInfo(0);
|
||||
return;
|
||||
}
|
||||
|
||||
return PP->HandleIdentifier(Tok);
|
||||
}
|
||||
|
||||
|
||||
assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective);
|
||||
}
|
||||
|
||||
// FIXME: This method can just be inlined into Lex().
|
||||
bool PTHLexer::LexEndOfFile(Token &Tok) {
|
||||
assert(!ParsingPreprocessorDirective);
|
||||
assert(!LexingRawMode);
|
||||
|
||||
// FIXME: Issue diagnostics similar to Lexer.
|
||||
return PP->HandleEndOfFile(Tok, false);
|
||||
}
|
||||
|
||||
// FIXME: We can just grab the last token instead of storing a copy
|
||||
// into EofToken.
|
||||
void PTHLexer::setEOF(Token& Tok) {
|
||||
assert(!EofToken.is(tok::eof));
|
||||
Tok = EofToken;
|
||||
}
|
||||
|
||||
void PTHLexer::DiscardToEndOfLine() {
|
||||
assert(ParsingPreprocessorDirective && ParsingFilename == false &&
|
||||
"Must be in a preprocessing directive!");
|
||||
|
||||
// We assume that if the preprocessor wishes to discard to the end of
|
||||
// the line that it also means to end the current preprocessor directive.
|
||||
ParsingPreprocessorDirective = false;
|
||||
|
||||
// Skip tokens by only peeking at their token kind and the flags.
|
||||
// We don't need to actually reconstruct full tokens from the token buffer.
|
||||
// This saves some copies and it also reduces IdentifierInfo* lookup.
|
||||
const char* p = CurPtr;
|
||||
while (1) {
|
||||
// Read the token kind. Are we at the end of the file?
|
||||
tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
|
||||
if (x == tok::eof) break;
|
||||
|
||||
// Read the token flags. Are we at the start of the next line?
|
||||
Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
|
||||
if (y & Token::StartOfLine) break;
|
||||
|
||||
// Skip to the next token.
|
||||
p += DISK_TOKEN_SIZE;
|
||||
}
|
||||
|
||||
CurPtr = p;
|
||||
}
|
||||
|
||||
/// SkipBlock - Used by Preprocessor to skip the current conditional block.
|
||||
bool PTHLexer::SkipBlock() {
|
||||
assert(CurPPCondPtr && "No cached PP conditional information.");
|
||||
|
@ -225,7 +246,6 @@ bool PTHLexer::SkipBlock() {
|
|||
// By construction NextIdx will be zero if this is a #endif. This is useful
|
||||
// to know to obviate lexing another token.
|
||||
bool isEndif = NextIdx == 0;
|
||||
NeedsFetching = true;
|
||||
|
||||
// This case can occur when we see something like this:
|
||||
//
|
||||
|
@ -240,7 +260,7 @@ bool PTHLexer::SkipBlock() {
|
|||
assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
|
||||
// Did we reach a #endif? If so, go ahead and consume that token as well.
|
||||
if (isEndif)
|
||||
CurPtr += DISK_TOKEN_SIZE;
|
||||
CurPtr += DISK_TOKEN_SIZE*2;
|
||||
else
|
||||
LastHashTokPtr = HashEntryI;
|
||||
|
||||
|
@ -253,20 +273,13 @@ bool PTHLexer::SkipBlock() {
|
|||
// Update the location of the last observed '#'. This is useful if we
|
||||
// are skipping multiple blocks.
|
||||
LastHashTokPtr = CurPtr;
|
||||
|
||||
#ifndef DEBUG
|
||||
// In a debug build we should verify that the token is really a '#' that
|
||||
// appears at the start of the line.
|
||||
Token Tok;
|
||||
ReadToken(Tok);
|
||||
assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
|
||||
#else
|
||||
// In a full release build we can just skip the token entirely.
|
||||
CurPtr += DISK_TOKEN_SIZE;
|
||||
#endif
|
||||
|
||||
// Skip the '#' token.
|
||||
assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash);
|
||||
CurPtr += DISK_TOKEN_SIZE;
|
||||
|
||||
// Did we reach a #endif? If so, go ahead and consume that token as well.
|
||||
if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
|
||||
if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
|
||||
|
||||
return isEndif;
|
||||
}
|
||||
|
@ -286,38 +299,6 @@ SourceLocation PTHLexer::getSourceLocation() {
|
|||
return SourceLocation::getFileLoc(FileID, offset);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Token reconstruction from the PTH file.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void PTHLexer::ReadToken(Token& T) {
|
||||
// Clear the token.
|
||||
// FIXME: Setting the flags directly should obviate this step.
|
||||
T.startToken();
|
||||
|
||||
// Shadow CurPtr into an automatic variable so that Read8 doesn't load and
|
||||
// store back into the instance variable.
|
||||
const char *CurPtrShadow = CurPtr;
|
||||
|
||||
// Read the type of the token.
|
||||
T.setKind((tok::TokenKind) Read8(CurPtrShadow));
|
||||
|
||||
// Set flags. This is gross, since we are really setting multiple flags.
|
||||
T.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
|
||||
|
||||
// Set the IdentifierInfo* (if any).
|
||||
T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
|
||||
|
||||
// Set the SourceLocation. Since all tokens are constructed using a
|
||||
// raw lexer, they will all be offseted from the same FileID.
|
||||
T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
|
||||
|
||||
// Finally, read and set the length of the token.
|
||||
T.setLength(Read32(CurPtrShadow));
|
||||
|
||||
CurPtr = CurPtrShadow;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Internal Data Structures for PTH file lookup and resolving identifiers.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
Loading…
Reference in New Issue