Added PTH optimization to not process entire blocks of tokens that appear in skipped preprocessor blocks. This improves PTH speed by 6%. The code for this optimization itself is not very optimized, and will get cleaned up.

llvm-svn: 60956
2008-12-12 18:34:08 +00:00 · 2008-12-12 18:34:08 +00:00 · 56572ab9e9
parent ee87e0bd31
commit 56572ab9e9
4 changed files with 194 additions and 24 deletions
--- a/clang/include/clang/Lex/PTHLexer.h
+++ b/clang/include/clang/Lex/PTHLexer.h
@ -32,6 +32,15 @@ class PTHLexer : public PreprocessorLexer {
  /// LastHashTokPtr - Pointer into TokBuf of the last processed '#'
  ///  token that appears at the start of a line.
  const char* LastHashTokPtr;
+  
+  /// PPCond - Pointer to a side table in the PTH file that provides a
+  ///  a consise summary of the preproccessor conditional block structure.
+  ///  This is used to perform quick skipping of conditional blocks.
+  const char* PPCond;
+  
+  /// CurPPCondPtr - Pointer inside PPCond that refers to the next entry
+  ///  to process when doing quick skipping of preprocessor blocks.
+  const char* CurPPCondPtr;

  PTHLexer(const PTHLexer&);  // DO NOT IMPLEMENT
  void operator=(const PTHLexer&); // DO NOT IMPLEMENT
@ -50,7 +59,7 @@ public:

  /// Create a PTHLexer for the specified token stream.
  PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 
-           PTHManager& PM);
+           const char* ppcond, PTHManager& PM);
  
  ~PTHLexer() {}
    
@ -77,17 +86,11 @@ public:
  /// getSourceLocation - Return a source location for the token in
  /// the current file.
  SourceLocation getSourceLocation() { return GetToken().getLocation(); }
+  
+  /// SkipBlock - Used by Preprocessor to skip the current conditional block.
+  bool SkipBlock();

-private:
-  
-  /// SkipToToken - Skip to the token at the specified offset in TokBuf.
-  void SkipToToken(unsigned offset) {
-    const char* NewPtr = TokBuf + offset;
-    assert(NewPtr > CurPtr && "SkipToToken should not go backwards!");
-    NeedsFetching = true;
-    CurPtr = NewPtr;
-  }
-  
+private:  
  /// AtLastToken - Returns true if the PTHLexer is at the last token.
  bool AtLastToken() { 
    Token T = GetToken();
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@ -560,6 +560,10 @@ private:
  void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
                                    bool FoundNonSkipPortion, bool FoundElse);
  
+  /// PTHSkipExcludedConditionalBlock - A fast PTH version of
+  ///  SkipExcludedConditionalBlock.
+  void PTHSkipExcludedConditionalBlock();
+  
  /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
  /// may occur after a #if or #elif directive and return it as a bool.  If the
  /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@ -122,6 +122,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
  CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
                                 FoundNonSkipPortion, FoundElse);
  
+  if (CurPTHLexer) {
+    PTHSkipExcludedConditionalBlock();
+    return;
+  }
+  
  // Enter raw mode to disable identifier lookup (and thus macro expansion),
  // disabling warnings, etc.
  CurPPLexer->LexingRawMode = true;
@ -291,6 +296,79 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
  CurPPLexer->LexingRawMode = false;
 }

+void Preprocessor::PTHSkipExcludedConditionalBlock() {
+  
+  while(1) {
+    assert(CurPTHLexer);
+    assert(CurPTHLexer->LexingRawMode == false);
+           
+    // Skip to the next '#else', '#elif', or #endif.
+    if (CurPTHLexer->SkipBlock()) {
+      // We have reached an #endif.  Both the '#' and 'endif' tokens
+      // have been consumed by the PTHLexer.  Just pop off the condition level.
+      PPConditionalInfo CondInfo;
+      bool InCond = CurPTHLexer->popConditionalLevel(CondInfo);
+      InCond = InCond;  // Silence warning in no-asserts mode.
+      assert(!InCond && "Can't be skipping if not in a conditional!");
+      break;
+    }
+      
+    // We have reached a '#else' or '#elif'.  Lex the next token to get
+    // the directive flavor.
+    Token Tok;
+    LexUnexpandedToken(Tok);
+                      
+    // We can actually look up the IdentifierInfo here since we aren't in
+    // raw mode.
+    tok::PPKeywordKind K = Tok.getIdentifierInfo()->getPPKeywordID();
+
+    if (K == tok::pp_else) {
+      // #else: Enter the else condition.  We aren't in a nested condition
+      //  since we skip those. We're always in the one matching the last
+      //  blocked we skipped.
+      PPConditionalInfo &CondInfo = CurPTHLexer->peekConditionalLevel();
+      // Note that we've seen a #else in this conditional.
+      CondInfo.FoundElse = true;
+      
+      // If the #if block wasn't entered then enter the #else block now.
+      if (!CondInfo.FoundNonSkip) {
+        CondInfo.FoundNonSkip = true;
+        break;
+      }
+      
+      // Otherwise skip this block.
+      continue;
+    }
+    
+    assert(K == tok::pp_elif);
+    PPConditionalInfo &CondInfo = CurPTHLexer->peekConditionalLevel();
+
+    // If this is a #elif with a #else before it, report the error.
+    if (CondInfo.FoundElse)
+      Diag(Tok, diag::pp_err_elif_after_else);
+    
+    // If this is in a skipping block or if we're already handled this #if
+    // block, don't bother parsing the condition.  We just skip this block.    
+    if (CondInfo.FoundNonSkip)
+      continue;
+
+    // Evaluate the condition of the #elif.
+    IdentifierInfo *IfNDefMacro = 0;
+    CurPTHLexer->ParsingPreprocessorDirective = true;
+    bool ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
+    CurPTHLexer->ParsingPreprocessorDirective = false;
+
+    // If this condition is true, enter it!
+    if (ShouldEnter) {
+      CondInfo.FoundNonSkip = true;
+      break;
+    }
+
+    // Otherwise, skip this block and go to the next one.
+    continue;
+  }
+}
+
 /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
 /// return null on failure.  isAngled indicates whether the file reference is
 /// for system #include's or not (i.e. using <> instead of "").
--- a/clang/lib/Lex/PTHLexer.cpp
+++ b/clang/lib/Lex/PTHLexer.cpp
@ -26,11 +26,12 @@

 using namespace clang;

+#define DISK_TOKEN_SIZE (2+3*4)
+
 PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
-                   PTHManager& PM)
+                   const char* ppcond, PTHManager& PM)
  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
-    PTHMgr(PM),
-    NeedsFetching(true) {
+    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
    // Make sure the EofToken is completely clean.
    EofToken.startToken();
  }
@ -82,15 +83,16 @@ LexNextToken:
  AdvanceToken();
    
  if (Tok.is(tok::hash)) {    
-    if (Tok.isAtStartOfLine() && !LexingRawMode) {
-      LastHashTokPtr = CurPtr;
-      
-      PP->HandleDirective(Tok);
+    if (Tok.isAtStartOfLine()) {
+      LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
+      if (!LexingRawMode) {
+        PP->HandleDirective(Tok);

-      if (PP->isCurrentLexer(this))
-        goto LexNextToken;
-
-      return PP->Lex(Tok);
+        if (PP->isCurrentLexer(this))
+          goto LexNextToken;
+        
+        return PP->Lex(Tok);
+      }
    }
  }

@ -156,6 +158,82 @@ static inline uint32_t Read32(const char*& data) {
  return V;
 }

+/// SkipBlock - Used by Preprocessor to skip the current conditional block.
+bool PTHLexer::SkipBlock() {
+  assert(CurPPCondPtr && "No cached PP conditional information.");
+  assert(LastHashTokPtr && "No known '#' token.");
+  
+  const char* Next = 0;
+  uint32_t Offset; 
+  uint32_t TableIdx;
+  
+  do {
+    Offset = Read32(CurPPCondPtr);
+    TableIdx = Read32(CurPPCondPtr);
+    Next = TokBuf + Offset;
+  }
+  while (Next < LastHashTokPtr);  
+  assert(Next == LastHashTokPtr && "No PP-cond entry found for '#'");
+  assert(TableIdx && "No jumping from #endifs.");
+  
+  // Update our side-table iterator.
+  const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
+  assert(NextPPCondPtr >= CurPPCondPtr);
+  CurPPCondPtr = NextPPCondPtr;
+  
+  // Read where we should jump to.
+  Next = TokBuf + Read32(NextPPCondPtr);
+  uint32_t NextIdx = Read32(NextPPCondPtr);
+  
+  // By construction NextIdx will be zero if this is a #endif.  This is useful
+  // to know to obviate lexing another token.
+  bool isEndif = NextIdx == 0;
+  NeedsFetching = true;
+  
+  // This case can occur when we see something like this:
+  //
+  //  #if ...
+  //   /* a comment or nothing */
+  //  #elif
+  //
+  // If we are skipping the first #if block it will be the case that CurPtr
+  // already points 'elif'.  Just return.
+  
+  if (CurPtr > Next) {
+    assert(CurPtr == Next + DISK_TOKEN_SIZE);
+    // Did we reach a #endif?  If so, go ahead and consume that token as well.
+    if (isEndif)
+      CurPtr += DISK_TOKEN_SIZE;
+    else
+      LastHashTokPtr = Next;
+    
+    return isEndif;
+  }
+
+  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
+  CurPtr = Next;
+  
+  // Update the location of the last observed '#'.  This is useful if we
+  // are skipping multiple blocks.
+  LastHashTokPtr = CurPtr;
+  
+#ifndef DEBUG
+  // In a debug build we should verify that the token is really a '#' that
+  // appears at the start of the line.
+  Token Tok;
+  ReadToken(Tok);
+  assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
+#else
+  // In a full release build we can just skip the token entirely.
+  CurPtr += DISK_TOKEN_SIZE;
+#endif
+
+  // Did we reach a #endif?  If so, go ahead and consume that token as well.
+  if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
+
+  return isEndif;
+}
+
 //===----------------------------------------------------------------------===//
 // Token reconstruction from the PTH file.
 //===----------------------------------------------------------------------===//
@ -179,7 +257,7 @@ void PTHLexer::ReadToken(Token& T) {
  T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr)));
  
  // Finally, read and set the length of the token.
-  T.setLength(Read32(CurPtr));
+  T.setLength(Read32(CurPtr));  
 }

 //===----------------------------------------------------------------------===//
@ -364,6 +442,13 @@ PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
  
  // Compute the offset of the token data within the buffer.
  const char* data = Buf->getBufferStart() + FileData.getTokenOffset();
+
+  // Get the location of pp-conditional table.
+  const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset();
+  uint32_t len = Read32(ppcond);  
+  if (len == 0) ppcond = 0;
+  
  assert(data < Buf->getBufferEnd());
-  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this); 
+  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
+                      *this); 
 }