PTH:

- Embed 'eom' tokens in PTH file. - Use embedded 'eom' tokens to not lazily generate them in the PTHLexer. This means that PTHLexer can always advance to the next token after reading a token (instead of buffering tokens using a copy). - Moved logic of 'ReadToken' into Lex. GetToken & ReadToken no longer exist. - These changes result in a 3.3% speedup (-Eonly) on Cocoa.h. - The code is a little gross. Many cleanups are possible and should be done. llvm-svn: 61360
2008-12-23 01:30:52 +00:00 · 2008-12-23 01:30:52 +00:00 · 1b18ad240c
parent 033f56d533
commit 1b18ad240c
4 changed files with 202 additions and 195 deletions
--- a/clang/Driver/CacheTokens.cpp
+++ b/clang/Driver/CacheTokens.cpp
@ -159,23 +159,43 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
  typedef std::vector<std::pair<Offset, unsigned> > PPCondTable;
  PPCondTable PPCond;
  std::vector<unsigned> PPStartCond;
+  bool ParsingPreprocessorDirective = false;

  Token Tok;
  
  do {
    L.LexFromRawLexer(Tok);
    
+    if ((Tok.isAtStartOfLine() || Tok.is(tok::eof)) &&
+        ParsingPreprocessorDirective) {
+      // Insert an eom token into the token cache.  It has the same
+      // position as the next token that is not on the same line as the
+      // preprocessor directive.  Observe that we continue processing
+      // 'Tok' when we exit this branch.
+      Token Tmp = Tok;
+      Tmp.setKind(tok::eom);
+      Tmp.clearFlag(Token::StartOfLine);
+      Tmp.setIdentifierInfo(0);
+      EmitToken(Out, Tmp, SMgr, idcount, IM);
+      ParsingPreprocessorDirective = false;
+    }
+    
    if (Tok.is(tok::identifier)) {
      Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+      continue;
    }
-    else if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
+
+    if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
      // Special processing for #include.  Store the '#' token and lex
      // the next token.
+      assert(!ParsingPreprocessorDirective);
      Offset HashOff = (Offset) Out.tell();
      EmitToken(Out, Tok, SMgr, idcount, IM);

      // Get the next token.
      L.LexFromRawLexer(Tok);
+            
+      assert(!Tok.isAtStartOfLine());
      
      // Did we see 'include'/'import'/'include_next'?
      if (!Tok.is(tok::identifier))
@ -185,27 +205,37 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
      Tok.setIdentifierInfo(II);
      tok::PPKeywordKind K = II->getPPKeywordID();
      
-      if (K == tok::pp_include || K == tok::pp_import || 
-          K == tok::pp_include_next) {
-        
+      assert(K != tok::pp_not_keyword);
+      ParsingPreprocessorDirective = true;
+      
+      switch (K) {
+      default:
+        break;
+      case tok::pp_include:
+      case tok::pp_import:
+      case tok::pp_include_next: {        
        // Save the 'include' token.
        EmitToken(Out, Tok, SMgr, idcount, IM);
-        
        // Lex the next token as an include string.
        L.setParsingPreprocessorDirective(true);
        L.LexIncludeFilename(Tok); 
        L.setParsingPreprocessorDirective(false);
-        
+        assert(!Tok.isAtStartOfLine());
        if (Tok.is(tok::identifier))
          Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+        
+        break;
      }
-      else if (K == tok::pp_if || K == tok::pp_ifdef || K == tok::pp_ifndef) {
+      case tok::pp_if:
+      case tok::pp_ifdef:
+      case tok::pp_ifndef: {
        // Ad an entry for '#if' and friends.  We initially set the target index
        // to 0.  This will get backpatched when we hit #endif.
        PPStartCond.push_back(PPCond.size());
        PPCond.push_back(std::make_pair(HashOff, 0U));
+        break;
      }
-      else if (K == tok::pp_endif) {
+      case tok::pp_endif: {
        // Add an entry for '#endif'.  We set the target table index to itself.
        // This will later be set to zero when emitting to the PTH file.  We
        // use 0 for uninitialized indices because that is easier to debug.
@ -218,9 +248,11 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
        PPStartCond.pop_back();        
        // Add the new entry to PPCond.      
        PPCond.push_back(std::make_pair(HashOff, index));
+        break;
      }
-      else if (K == tok::pp_elif || K == tok::pp_else) {
-        // Add an entry for '#elif' or '#else.
+      case tok::pp_elif:
+      case tok::pp_else: {
+        // Add an entry for #elif or #else.
        // This serves as both a closing and opening of a conditional block.
        // This means that its entry will get backpatched later.
        unsigned index = PPCond.size();
@ -233,6 +265,8 @@ LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
        // Now add '#elif' as a new block opening.
        PPCond.push_back(std::make_pair(HashOff, 0U));
        PPStartCond.push_back(index);
+        break;
+      }
      }
    }    
  }
--- a/clang/include/clang/Lex/PTHLexer.h
+++ b/clang/include/clang/Lex/PTHLexer.h
@ -51,9 +51,7 @@ class PTHLexer : public PreprocessorLexer {
  /// PTHMgr - The PTHManager object that created this PTHLexer.
  PTHManager& PTHMgr;
  
-  Token LastFetched;
  Token EofToken;
-  bool NeedsFetching;
  
 public:  

@ -95,20 +93,7 @@ public:
  /// SkipBlock - Used by Preprocessor to skip the current conditional block.
  bool SkipBlock();

-private:  
-  /// AtLastToken - Returns true if the PTHLexer is at the last token.
-  bool AtLastToken() { 
-    Token T = GetToken();
-    return T.is(tok::eof) ? EofToken = T, true : false;
-  }
-  
-  /// GetToken - Returns the next token.  This method does not advance the
-  ///  PTHLexer to the next token.
-  Token GetToken();
-  
-  /// AdvanceToken - Advances the PTHLexer to the next token.
-  void AdvanceToken() { NeedsFetching = true; }
-  
+private:
  bool LexEndOfFile(Token &Result);
 };

--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@ -339,6 +339,13 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() {
      // If the #if block wasn't entered then enter the #else block now.
      if (!CondInfo.FoundNonSkip) {
        CondInfo.FoundNonSkip = true;
+        
+        // Consume the eom token.
+        CurPTHLexer->ParsingPreprocessorDirective = true;
+        LexUnexpandedToken(Tok);
+        assert(Tok.is(tok::eom));
+        CurPTHLexer->ParsingPreprocessorDirective = false;
+        
        break;
      }
      
--- a/clang/lib/Lex/PTHLexer.cpp
+++ b/clang/lib/Lex/PTHLexer.cpp
@ -28,129 +28,6 @@ using namespace clang;

 #define DISK_TOKEN_SIZE (2+3*4)

-PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
-                   const char* ppcond, PTHManager& PM)
-  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
-    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
-    // Make sure the EofToken is completely clean.
-    EofToken.startToken();
-  }
-
-Token PTHLexer::GetToken() {
-  // Read the next token, or if we haven't advanced yet, get the last
-  // token read.
-  if (NeedsFetching) {
-    NeedsFetching = false;
-    ReadToken(LastFetched);
-  }
-  
-  Token Tok = LastFetched;
-  
-  // If we are in raw mode, zero out identifier pointers.  This is
-  // needed for 'pragma poison'.  Note that this requires that the Preprocessor
-  // can go back to the original source when it calls getSpelling().
-  if (LexingRawMode && Tok.is(tok::identifier))
-    Tok.setIdentifierInfo(0);
-
-  return Tok;
-}
-
-void PTHLexer::Lex(Token& Tok) {
-LexNextToken:
-  Tok = GetToken();
-  
-  if (AtLastToken()) {
-    Preprocessor *PPCache = PP;
-
-    if (LexEndOfFile(Tok))
-      return;
-
-    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
-    return PPCache->Lex(Tok);
-  }
-  
-  // Don't advance to the next token yet.  Check if we are at the
-  // start of a new line and we're processing a directive.  If so, we
-  // consume this token twice, once as an tok::eom.
-  if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
-    ParsingPreprocessorDirective = false;
-    Tok.setKind(tok::eom);
-    MIOpt.ReadToken();
-    return;
-  }
-  
-  // Advance to the next token.
-  AdvanceToken();
-    
-  if (Tok.is(tok::hash)) {    
-    if (Tok.isAtStartOfLine()) {
-      LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
-      if (!LexingRawMode) {
-        PP->HandleDirective(Tok);
-
-        if (PP->isCurrentLexer(this))
-          goto LexNextToken;
-        
-        return PP->Lex(Tok);
-      }
-    }
-  }
-
-  MIOpt.ReadToken();
-  
-  if (Tok.is(tok::identifier)) {
-    if (LexingRawMode) return;
-    return PP->HandleIdentifier(Tok);
-  }  
-}
-
-bool PTHLexer::LexEndOfFile(Token &Tok) {
-  
-  if (ParsingPreprocessorDirective) {
-    ParsingPreprocessorDirective = false;
-    Tok.setKind(tok::eom);
-    MIOpt.ReadToken();
-    return true; // Have a token.
-  }
-  
-  if (LexingRawMode) {
-    MIOpt.ReadToken();
-    return true;  // Have an eof token.
-  }
-  
-  // FIXME: Issue diagnostics similar to Lexer.
-  return PP->HandleEndOfFile(Tok, false);
-}
-
-void PTHLexer::setEOF(Token& Tok) {
-  assert(!EofToken.is(tok::eof));
-  Tok = EofToken;
-}
-
-void PTHLexer::DiscardToEndOfLine() {
-  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
-         "Must be in a preprocessing directive!");
-
-  // Skip tokens by only peeking at their token kind and the flags.
-  // We don't need to actually reconstruct full tokens from the token buffer.
-  // This saves some copies and it also reduces IdentifierInfo* lookup.
-  const char* p = CurPtr;
-  while (1) {
-    // Read the token kind.  Are we at the end of the file?
-    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
-    if (x == tok::eof) break;
-
-    // Read the token flags.  Are we at the start of the next line?
-    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
-    if (y & Token::StartOfLine) break;
-
-    // Skip to the next token.
-    p += DISK_TOKEN_SIZE;
-  }
-  
-  CurPtr = p;
-}
-
 //===----------------------------------------------------------------------===//
 // Utility methods for reading from the mmap'ed PTH file.
 //===----------------------------------------------------------------------===//
@ -167,6 +44,150 @@ static inline uint32_t Read32(const char*& data) {
  return V;
 }

+//===----------------------------------------------------------------------===//
+// PTHLexer methods.
+//===----------------------------------------------------------------------===//
+
+PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
+                   const char* ppcond, PTHManager& PM)
+  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
+    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {}
+
+void PTHLexer::Lex(Token& Tok) {
+LexNextToken:
+  
+  // Read the token.
+  // FIXME: Setting the flags directly should obviate this step.
+  Tok.startToken();
+  
+  // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
+  // store back into the instance variable.
+  const char *CurPtrShadow = CurPtr;
+  
+  // Read the type of the token.
+  Tok.setKind((tok::TokenKind) Read8(CurPtrShadow));
+  
+  // Set flags.  This is gross, since we are really setting multiple flags.
+  Tok.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
+  
+  // Set the IdentifierInfo* (if any).
+  Tok.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
+  
+  // Set the SourceLocation.  Since all tokens are constructed using a
+  // raw lexer, they will all be offseted from the same FileID.
+  Tok.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
+  
+  // Finally, read and set the length of the token.
+  Tok.setLength(Read32(CurPtrShadow));
+
+  CurPtr = CurPtrShadow;
+
+  if (Tok.is(tok::eof)) {
+    // Save the end-of-file token.
+    EofToken = Tok;
+    
+    Preprocessor *PPCache = PP;
+
+    if (LexEndOfFile(Tok))
+      return;
+
+    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+    return PPCache->Lex(Tok);
+  }
+
+  MIOpt.ReadToken();
+  
+  if (Tok.is(tok::eom)) {
+    ParsingPreprocessorDirective = false;
+    return;
+  }
+  
+#if 0
+  SourceManager& SM = PP->getSourceManager();
+  SourceLocation L = Tok.getLocation();
+  
+  static const char* last = 0;
+  const char* next = SM.getContentCacheForLoc(L)->Entry->getName();
+  if (next != last) {
+    last = next;
+    llvm::cerr << next << '\n';
+  }
+
+  llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " <<
+  SM.getLogicalColumnNumber(L) << '\n';
+#endif
+    
+  if (Tok.is(tok::hash)) {    
+    if (Tok.isAtStartOfLine()) {
+      LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
+      if (!LexingRawMode) {
+        PP->HandleDirective(Tok);
+
+        if (PP->isCurrentLexer(this))
+          goto LexNextToken;
+        
+        return PP->Lex(Tok);
+      }
+    }
+  }
+  
+  if (Tok.is(tok::identifier)) {
+    if (LexingRawMode) {
+      Tok.setIdentifierInfo(0);
+      return;
+    }
+    
+    return PP->HandleIdentifier(Tok);
+  }
+
+  
+  assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective);
+}
+
+// FIXME: This method can just be inlined into Lex().
+bool PTHLexer::LexEndOfFile(Token &Tok) {
+  assert(!ParsingPreprocessorDirective);
+  assert(!LexingRawMode);
+  
+  // FIXME: Issue diagnostics similar to Lexer.
+  return PP->HandleEndOfFile(Tok, false);
+}
+
+// FIXME: We can just grab the last token instead of storing a copy
+// into EofToken.
+void PTHLexer::setEOF(Token& Tok) {
+  assert(!EofToken.is(tok::eof));
+  Tok = EofToken;
+}
+
+void PTHLexer::DiscardToEndOfLine() {
+  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+
+  // We assume that if the preprocessor wishes to discard to the end of
+  // the line that it also means to end the current preprocessor directive.
+  ParsingPreprocessorDirective = false;
+  
+  // Skip tokens by only peeking at their token kind and the flags.
+  // We don't need to actually reconstruct full tokens from the token buffer.
+  // This saves some copies and it also reduces IdentifierInfo* lookup.
+  const char* p = CurPtr;
+  while (1) {
+    // Read the token kind.  Are we at the end of the file?
+    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
+    if (x == tok::eof) break;
+    
+    // Read the token flags.  Are we at the start of the next line?
+    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
+    if (y & Token::StartOfLine) break;
+
+    // Skip to the next token.
+    p += DISK_TOKEN_SIZE;
+  }
+  
+  CurPtr = p;
+}
+
 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
 bool PTHLexer::SkipBlock() {
  assert(CurPPCondPtr && "No cached PP conditional information.");
@ -225,7 +246,6 @@ bool PTHLexer::SkipBlock() {
  // By construction NextIdx will be zero if this is a #endif.  This is useful
  // to know to obviate lexing another token.
  bool isEndif = NextIdx == 0;
-  NeedsFetching = true;
  
  // This case can occur when we see something like this:
  //
@ -240,7 +260,7 @@ bool PTHLexer::SkipBlock() {
    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
    // Did we reach a #endif?  If so, go ahead and consume that token as well.
    if (isEndif)
-      CurPtr += DISK_TOKEN_SIZE;
+      CurPtr += DISK_TOKEN_SIZE*2;
    else
      LastHashTokPtr = HashEntryI;
    
@ -253,20 +273,13 @@ bool PTHLexer::SkipBlock() {
  // Update the location of the last observed '#'.  This is useful if we
  // are skipping multiple blocks.
  LastHashTokPtr = CurPtr;
-  
-#ifndef DEBUG
-  // In a debug build we should verify that the token is really a '#' that
-  // appears at the start of the line.
-  Token Tok;
-  ReadToken(Tok);
-  assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
-#else
-  // In a full release build we can just skip the token entirely.
-  CurPtr += DISK_TOKEN_SIZE;
-#endif

+  // Skip the '#' token.
+  assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash);
+  CurPtr += DISK_TOKEN_SIZE;
+  
  // Did we reach a #endif?  If so, go ahead and consume that token as well.
-  if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
+  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }

  return isEndif;
 }
@ -286,38 +299,6 @@ SourceLocation PTHLexer::getSourceLocation() {
  return SourceLocation::getFileLoc(FileID, offset);
 }

-//===----------------------------------------------------------------------===//
-// Token reconstruction from the PTH file.
-//===----------------------------------------------------------------------===//
-
-void PTHLexer::ReadToken(Token& T) {
-  // Clear the token.
-  // FIXME: Setting the flags directly should obviate this step.
-  T.startToken();
-  
-  // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
-  // store back into the instance variable.
-  const char *CurPtrShadow = CurPtr;
-  
-  // Read the type of the token.
-  T.setKind((tok::TokenKind) Read8(CurPtrShadow));
-  
-  // Set flags.  This is gross, since we are really setting multiple flags.
-  T.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
-  
-  // Set the IdentifierInfo* (if any).
-  T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
-  
-  // Set the SourceLocation.  Since all tokens are constructed using a
-  // raw lexer, they will all be offseted from the same FileID.
-  T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
-  
-  // Finally, read and set the length of the token.
-  T.setLength(Read32(CurPtrShadow));
-  
-  CurPtr = CurPtrShadow;
-}
-
 //===----------------------------------------------------------------------===//
 // Internal Data Structures for PTH file lookup and resolving identifiers.
 //===----------------------------------------------------------------------===//