Implement the automaton for recognizing files with controlling macros.

llvm-svn: 38646
2006-07-04 07:11:10 +00:00 · 2006-07-04 07:11:10 +00:00 · 371ac8a9b7
parent d7dfa57efd
commit 371ac8a9b7
6 changed files with 212 additions and 15 deletions
--- a/clang/Lex/Lexer.cpp
+++ b/clang/Lex/Lexer.cpp
@ -979,6 +979,8 @@ LexNextToken:
    goto LexNextToken;   // GCC isn't tail call eliminating.

  case 'L':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
    Char = getCharAndSize(CurPtr, SizeTmp);

    // Wide string literal.
@ -1000,20 +1002,28 @@ LexNextToken:
  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
  case 'v': case 'w': case 'x': case 'y': case 'z':
  case '_':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
    return LexIdentifier(Result, CurPtr);
    
  // C99 6.4.4.1: Integer Constants.
  // C99 6.4.4.2: Floating Constants.
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
    return LexNumericConstant(Result, CurPtr);
    
  // C99 6.4.4: Character Constants.
  case '\'':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
    return LexCharConstant(Result, CurPtr);

  // C99 6.4.5: String Literals.
  case '"':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
    return LexStringLiteral(Result, CurPtr);

  // C99 6.4.6: Punctuators.
@ -1041,6 +1051,9 @@ LexNextToken:
  case '.':
    Char = getCharAndSize(CurPtr, SizeTmp);
    if (Char >= '0' && Char <= '9') {
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
+
      return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
    } else if (Features.CPlusPlus && Char == '*') {
      Result.SetKind(tok::periodstar);
@ -1333,6 +1346,8 @@ LexNextToken:
      break;
    } else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
      Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
      return LexIdentifier(Result, CurPtr);
    }
    
@ -1341,6 +1356,9 @@ LexNextToken:
    goto LexNextToken;   // GCC isn't tail call eliminating.
  }
  
+  // Notify MIOpt that we read a non-whitespace/non-comment token.
+  MIOpt.ReadToken();
+
  // Update the location of token as well as BufferPtr.
  FormTokenWithChars(Result, CurPtr);
 }
--- a/clang/Lex/Pragma.cpp
+++ b/clang/Lex/Pragma.cpp
@ -74,6 +74,9 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, LexerToken &Tok) {
 void Preprocessor::HandlePragmaDirective() {
  ++NumPragma;
  
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+  
  // Invoke the first level of pragma handlers which reads the namespace id.
  LexerToken Tok;
  PragmaHandlers->HandlePragma(*this, Tok);
--- a/clang/Lex/Preprocessor.cpp
+++ b/clang/Lex/Preprocessor.cpp
@ -739,6 +739,14 @@ void Preprocessor::HandleEndOfFile(LexerToken &Result, bool isEndOfMacro) {
    return;
  }
  
+  // See if this file had a controlling macro.
+  if (CurLexer) {  // Not ending a macro...
+    if (const IdentifierTokenInfo *ControllingMacro = 
+          CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
+      ;
+    }
+  }
+  
  // If this is a #include'd file, pop it off the include stack and continue
  // lexing the #includer file.
  if (!IncludeMacroStack.empty()) {
@ -1049,6 +1057,11 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
  
  ++NumDirectives;
  
+  // We are about to read a token.  For the multiple-include optimization FA to
+  // work, we have to remember if we had read any tokens *before* this 
+  // pp-directive.
+  bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal();
+  
  // Read the next token, the directive flavor.
  LexUnexpandedToken(Result);
  
@ -1059,6 +1072,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {

 #if 0
  case tok::numeric_constant:
+    MIOpt.ReadDirective();
    // FIXME: implement # 7 line numbers!
    break;
 #endif
@ -1073,7 +1087,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
    switch (Result.getIdentifierInfo()->getNameLength()) {
    case 4:
      if (Directive[0] == 'l' && !strcmp(Directive, "line"))
-        ;  // FIXME: implement #line
+        CurLexer->MIOpt.ReadDirective();  // FIXME: implement #line
      if (Directive[0] == 'e' && !strcmp(Directive, "elif"))
        return HandleElifDirective(Result);
      if (Directive[0] == 's' && !strcmp(Directive, "sccs"))
@ -1083,7 +1097,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
      if (Directive[0] == 'e' && !strcmp(Directive, "endif"))
        return HandleEndifDirective(Result);
      if (Directive[0] == 'i' && !strcmp(Directive, "ifdef"))
-        return HandleIfdefDirective(Result, false);
+        return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
      if (Directive[0] == 'u' && !strcmp(Directive, "undef"))
        return HandleUndefDirective(Result);
      if (Directive[0] == 'e' && !strcmp(Directive, "error"))
@ -1095,7 +1109,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
      if (Directive[0] == 'd' && !strcmp(Directive, "define"))
        return HandleDefineDirective(Result);
      if (Directive[0] == 'i' && !strcmp(Directive, "ifndef"))
-        return HandleIfdefDirective(Result, true);
+        return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
      if (Directive[0] == 'i' && !strcmp(Directive, "import"))
        return HandleImportDirective(Result);
      if (Directive[0] == 'p' && !strcmp(Directive, "pragma"))
@ -1128,9 +1142,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
  Diag(Result, diag::err_pp_invalid_directive);
  
  // Read the rest of the PP line.
-  do {
-    Lex(Result);
-  } while (Result.getKind() != tok::eom);
+  DiscardUntilEndOfDirective();
  
  // Okay, we're done parsing the directive.
 }
@ -1151,8 +1163,13 @@ void Preprocessor::HandleUserDiagnosticDirective(LexerToken &Tok,
 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
 ///
 void Preprocessor::HandleIdentSCCSDirective(LexerToken &Tok) {
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+  
+  // Yes, this directive is an extension.
  Diag(Tok, diag::ext_pp_ident_directive);
  
+  // Read the string argument.
  LexerToken StrTok;
  Lex(StrTok);
  
@ -1179,6 +1196,10 @@ void Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok,
                                          const DirectoryLookup *LookupFrom,
                                          bool isImport) {
  ++NumIncluded;
+
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+
  LexerToken FilenameTok;
  std::string Filename = CurLexer->LexIncludeFilename(FilenameTok);
  
@ -1225,8 +1246,7 @@ void Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok,
  }

  // Look up the file, create a File ID for it.
-  unsigned FileID = 
-    SourceMgr.createFileID(File, FilenameTok.getLocation());
+  unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation());
  if (FileID == 0)
    return Diag(FilenameTok, diag::err_pp_file_not_found);

@ -1276,6 +1296,10 @@ void Preprocessor::HandleImportDirective(LexerToken &ImportTok) {
 ///
 void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
  ++NumDefined;
+
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+
  LexerToken MacroNameTok;
  ReadMacroName(MacroNameTok, true);
  
@ -1345,6 +1369,10 @@ void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
 ///
 void Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
  ++NumUndefined;
+
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+
  LexerToken MacroNameTok;
  ReadMacroName(MacroNameTok, true);
  
@ -1375,12 +1403,15 @@ void Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
 //===----------------------------------------------------------------------===//

 /// HandleIfdefDirective - Implements the #ifdef/#ifndef directive.  isIfndef is
-/// true when this is a #ifndef directive.
+/// true when this is a #ifndef directive.  ReadAnyTokensBeforeDirective is true
+/// if any tokens have been returned or pp-directives activated before this
+/// #ifndef has been lexed.
 ///
-void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
+void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef,
+                                        bool ReadAnyTokensBeforeDirective) {
  ++NumIf;
  LexerToken DirectiveTok = Result;
-  
+
  LexerToken MacroNameTok;
  ReadMacroName(MacroNameTok);
  
@ -1389,7 +1420,14 @@ void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
    return;
  
  // Check to see if this is the last token on the #if[n]def line.
-  CheckEndOfDirective("#ifdef");
+  CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef");
+  
+  // If the start of a top-level #ifdef, inform MIOpt.
+  if (!ReadAnyTokensBeforeDirective &&
+      CurLexer->getConditionalStackDepth() == 0) {
+    assert(isIfndef && "#ifdef shouldn't reach here");
+    CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
+  }
  
  MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo();

@ -1413,6 +1451,11 @@ void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
 ///
 void Preprocessor::HandleIfDirective(LexerToken &IfToken) {
  ++NumIf;
+  
+  // FIXME: Detect "#if !defined(X)" for the MIOpt.
+  CurLexer->MIOpt.ReadDirective();
+
+  // Parse and evaluation the conditional expression.
  bool ConditionalTrue = EvaluateDirectiveExpression();
  
  // Should we include the stuff contained by this directive?
@ -1431,6 +1474,7 @@ void Preprocessor::HandleIfDirective(LexerToken &IfToken) {
 ///
 void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
  ++NumEndif;
+  
  // Check that this is the whole directive.
  CheckEndOfDirective("#endif");
  
@ -1440,6 +1484,10 @@ void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
    return Diag(EndifToken, diag::err_pp_endif_without_if);
  }
  
+  // If this the end of a top-level #endif, inform MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.ExitTopLevelConditional();
+  
  assert(!CondInfo.WasSkipping && !isSkipping() &&
         "This code should only be reachable in the non-skipping case!");
 }
@ -1447,12 +1495,17 @@ void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {

 void Preprocessor::HandleElseDirective(LexerToken &Result) {
  ++NumElse;
+  
  // #else directive in a non-skipping conditional... start skipping.
  CheckEndOfDirective("#else");
  
  PPConditionalInfo CI;
  if (CurLexer->popConditionalLevel(CI))
    return Diag(Result, diag::pp_err_else_without_if);
+  
+  // If this is a top-level #else, inform the MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.FoundTopLevelElse();

  // If this is a #else with a #else before it, report the error.
  if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
@ -1465,6 +1518,7 @@ void Preprocessor::HandleElseDirective(LexerToken &Result) {

 void Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
  ++NumElse;
+  
  // #elif directive in a non-skipping conditional... start skipping.
  // We don't care what the condition is, because we will always skip it (since
  // the block immediately before it was included).
@ -1474,6 +1528,10 @@ void Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
  if (CurLexer->popConditionalLevel(CI))
    return Diag(ElifToken, diag::pp_err_elif_without_if);
  
+  // If this is a top-level #elif, inform the MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.FoundTopLevelElse();
+  
  // If this is a #elif with a #else before it, report the error.
  if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);

--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@ -15,6 +15,7 @@
 #define LLVM_CLANG_LEXER_H

 #include "clang/Lex/LexerToken.h"
+#include "clang/Lex/MultipleIncludeOpt.h"
 #include <string>
 #include <vector>

@ -66,7 +67,11 @@ class Lexer {
  bool ParsingFilename;          // True after #include: turn <xx> into string.
  
  // Context that changes as the file is lexed.
-    
+  
+  /// MIOpt - This is a state machine that detects the #ifndef-wrapping a file 
+  /// idiom for the multiple-include optimization.
+  MultipleIncludeOpt MIOpt;
+  
  /// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks
  /// we are currently in.
  std::vector<PPConditionalInfo> ConditionalStack;
@ -114,7 +119,8 @@ public:
      IsAtStartOfLine = false;
    }
   
-    // Get a token.
+    // Get a token.  Note that this may delete the current lexer if the end of
+    // file is reached.
    LexTokenInternal(Result);
  }
  
--- a/clang/include/clang/Lex/MultipleIncludeOpt.h
+++ b/clang/include/clang/Lex/MultipleIncludeOpt.h
@ -0,0 +1,111 @@
+//===--- MultipleIncludeOpt.h - Header Multiple-Include Optzn ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the MultipleIncludeOpt interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MULTIPLEINCLUDEOPT_H
+#define LLVM_CLANG_MULTIPLEINCLUDEOPT_H
+
+namespace llvm {
+namespace clang {
+class IdentifierTokenInfo;
+
+/// MultipleIncludeOpt - This class implements the simple state machine that the
+/// Lexer class uses to detect files subject to the 'multiple-include'
+/// optimization.  The public methods in this class are triggered by various
+/// events that occur when a file is lexed, and after the entire file is lexed,
+/// information about which macro (if any) controls the header is returned.
+class MultipleIncludeOpt {
+  /// ReadAnyTokens - This is set to false when a file is first opened and true
+  /// any time a token is returned to the client or a (non-multiple-include)
+  /// directive is parsed.  When the final #endif is parsed this is reset back
+  /// to false, that way any tokens before the first #ifdef or after the last
+  /// #endif can be easily detected.
+  bool ReadAnyTokens;
+  
+  /// TheMacro - The controlling macro for a file, if valid.
+  ///
+  const IdentifierTokenInfo *TheMacro;
+public:
+  MultipleIncludeOpt() : ReadAnyTokens(false), TheMacro(0) {}
+  
+  /// Invalidate - Permenantly mark this file as not being suitable for the
+  /// include-file optimization.
+  void Invalidate() {
+    // If we have read tokens but have no controlling macro, the state-machine
+    // below can never "accept".
+    ReadAnyTokens = true;
+    TheMacro = 0;
+  }
+  
+  /// getHasReadAnyTokensVal - This is used for the #ifndef hande-shake at the
+  /// top of the file when reading preprocessor directives.  Otherwise, reading
+  /// the "ifndef x" would count as reading tokens.
+  bool getHasReadAnyTokensVal() const { return ReadAnyTokens; }
+  
+  // If a token or directive is read, remember that we have seen a side-effect
+  // in this file.
+  void ReadToken()     { ReadAnyTokens = true; }
+  void ReadDirective() { ReadAnyTokens = true; } 
+  
+  /// EnterTopLevelIFNDEF - When entering a top-level #ifndef directive (or the
+  /// "#if !defined" equivalent) without any preceding tokens, this method is
+  /// called.
+  void EnterTopLevelIFNDEF(const IdentifierTokenInfo *M) {
+    // Note, we don't care about the input value of 'ReadAnyTokens'.  The caller
+    // ensures that this is only called if there are no tokens read before the
+    // #ifndef.
+    
+    // If the macro is already set, this is after the top-level #endif.
+    if (TheMacro)
+      return Invalidate();
+    
+    // Remember that we're in the #if and that we have the macro.
+    ReadAnyTokens = true;
+    TheMacro = M;
+  }
+
+  /// FoundTopLevelElse - This is invoked when an #else/#elif directive is found
+  /// in the top level conditional in the file.
+  void FoundTopLevelElse() {
+    /// If a #else directive is found at the top level, there is a chunk of the
+    /// file not guarded by the controlling macro.
+    Invalidate();
+  }
+  
+  /// ExitTopLevelConditional - This method is called when the lexer exits the
+  /// top-level conditional.
+  void ExitTopLevelConditional() {
+    // If we have a macro, that means the top of the file was ok.  Set our state
+    // back to "not having read any tokens" so we can detect anything after the
+    // #endif.
+    if (!TheMacro) return Invalidate();
+    
+    // At this point, we haven't "read any tokens" but we do have a controlling
+    // macro.
+    ReadAnyTokens = false;
+  }
+  
+  /// GetControllingMacroAtEndOfFile - Once the entire file has been lexed, if
+  /// there is a controlling macro, return it.
+  const IdentifierTokenInfo *GetControllingMacroAtEndOfFile() const {
+    // If we haven't read any tokens after the #endif, return the controlling
+    // macro if it's valid (if it isn't, it will be null).
+    if (!ReadAnyTokens)
+      return TheMacro;
+    return 0;
+  }
+};
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@ -482,7 +482,8 @@ private:
  // HandleUnassertDirective(LexerToken &Tok);
  
  // Conditional Inclusion.
-  void HandleIfdefDirective(LexerToken &Tok, bool isIfndef);
+  void HandleIfdefDirective(LexerToken &Tok, bool isIfndef,
+                            bool ReadAnyTokensBeforeDirective);
  void HandleIfDirective(LexerToken &Tok);
  void HandleEndifDirective(LexerToken &Tok);
  void HandleElseDirective(LexerToken &Tok);