Implement the automaton for recognizing files with controlling macros.

llvm-svn: 38646
This commit is contained in:
Chris Lattner 2006-07-04 07:11:10 +00:00
parent d7dfa57efd
commit 371ac8a9b7
6 changed files with 212 additions and 15 deletions

View File

@ -979,6 +979,8 @@ LexNextToken:
goto LexNextToken; // GCC isn't tail call eliminating.
case 'L':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
Char = getCharAndSize(CurPtr, SizeTmp);
// Wide string literal.
@ -1000,20 +1002,28 @@ LexNextToken:
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
case '_':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexIdentifier(Result, CurPtr);
// C99 6.4.4.1: Integer Constants.
// C99 6.4.4.2: Floating Constants.
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);
// C99 6.4.4: Character Constants.
case '\'':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexCharConstant(Result, CurPtr);
// C99 6.4.5: String Literals.
case '"':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexStringLiteral(Result, CurPtr);
// C99 6.4.6: Punctuators.
@ -1041,6 +1051,9 @@ LexNextToken:
case '.':
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char >= '0' && Char <= '9') {
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
} else if (Features.CPlusPlus && Char == '*') {
Result.SetKind(tok::periodstar);
@ -1333,6 +1346,8 @@ LexNextToken:
break;
} else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
Diag(CurPtr-1, diag::ext_dollar_in_identifier);
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexIdentifier(Result, CurPtr);
}
@ -1341,6 +1356,9 @@ LexNextToken:
goto LexNextToken; // GCC isn't tail call eliminating.
}
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
// Update the location of token as well as BufferPtr.
FormTokenWithChars(Result, CurPtr);
}

View File

@ -74,6 +74,9 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, LexerToken &Tok) {
void Preprocessor::HandlePragmaDirective() {
++NumPragma;
// Inform MIOpt that we found a side-effect of parsing this file.
CurLexer->MIOpt.ReadDirective();
// Invoke the first level of pragma handlers which reads the namespace id.
LexerToken Tok;
PragmaHandlers->HandlePragma(*this, Tok);

View File

@ -739,6 +739,14 @@ void Preprocessor::HandleEndOfFile(LexerToken &Result, bool isEndOfMacro) {
return;
}
// See if this file had a controlling macro.
if (CurLexer) { // Not ending a macro...
if (const IdentifierTokenInfo *ControllingMacro =
CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
;
}
}
// If this is a #include'd file, pop it off the include stack and continue
// lexing the #includer file.
if (!IncludeMacroStack.empty()) {
@ -1049,6 +1057,11 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
++NumDirectives;
// We are about to read a token. For the multiple-include optimization FA to
// work, we have to remember if we had read any tokens *before* this
// pp-directive.
bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal();
// Read the next token, the directive flavor.
LexUnexpandedToken(Result);
@ -1059,6 +1072,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
#if 0
case tok::numeric_constant:
MIOpt.ReadDirective();
// FIXME: implement # 7 line numbers!
break;
#endif
@ -1073,7 +1087,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
switch (Result.getIdentifierInfo()->getNameLength()) {
case 4:
if (Directive[0] == 'l' && !strcmp(Directive, "line"))
; // FIXME: implement #line
CurLexer->MIOpt.ReadDirective(); // FIXME: implement #line
if (Directive[0] == 'e' && !strcmp(Directive, "elif"))
return HandleElifDirective(Result);
if (Directive[0] == 's' && !strcmp(Directive, "sccs"))
@ -1083,7 +1097,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
if (Directive[0] == 'e' && !strcmp(Directive, "endif"))
return HandleEndifDirective(Result);
if (Directive[0] == 'i' && !strcmp(Directive, "ifdef"))
return HandleIfdefDirective(Result, false);
return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
if (Directive[0] == 'u' && !strcmp(Directive, "undef"))
return HandleUndefDirective(Result);
if (Directive[0] == 'e' && !strcmp(Directive, "error"))
@ -1095,7 +1109,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
if (Directive[0] == 'd' && !strcmp(Directive, "define"))
return HandleDefineDirective(Result);
if (Directive[0] == 'i' && !strcmp(Directive, "ifndef"))
return HandleIfdefDirective(Result, true);
return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
if (Directive[0] == 'i' && !strcmp(Directive, "import"))
return HandleImportDirective(Result);
if (Directive[0] == 'p' && !strcmp(Directive, "pragma"))
@ -1128,9 +1142,7 @@ void Preprocessor::HandleDirective(LexerToken &Result) {
Diag(Result, diag::err_pp_invalid_directive);
// Read the rest of the PP line.
do {
Lex(Result);
} while (Result.getKind() != tok::eom);
DiscardUntilEndOfDirective();
// Okay, we're done parsing the directive.
}
@ -1151,8 +1163,13 @@ void Preprocessor::HandleUserDiagnosticDirective(LexerToken &Tok,
/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
///
void Preprocessor::HandleIdentSCCSDirective(LexerToken &Tok) {
// Inform MIOpt that we found a side-effect of parsing this file.
CurLexer->MIOpt.ReadDirective();
// Yes, this directive is an extension.
Diag(Tok, diag::ext_pp_ident_directive);
// Read the string argument.
LexerToken StrTok;
Lex(StrTok);
@ -1179,6 +1196,10 @@ void Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok,
const DirectoryLookup *LookupFrom,
bool isImport) {
++NumIncluded;
// Inform MIOpt that we found a side-effect of parsing this file.
CurLexer->MIOpt.ReadDirective();
LexerToken FilenameTok;
std::string Filename = CurLexer->LexIncludeFilename(FilenameTok);
@ -1225,8 +1246,7 @@ void Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok,
}
// Look up the file, create a File ID for it.
unsigned FileID =
SourceMgr.createFileID(File, FilenameTok.getLocation());
unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation());
if (FileID == 0)
return Diag(FilenameTok, diag::err_pp_file_not_found);
@ -1276,6 +1296,10 @@ void Preprocessor::HandleImportDirective(LexerToken &ImportTok) {
///
void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
++NumDefined;
// Inform MIOpt that we found a side-effect of parsing this file.
CurLexer->MIOpt.ReadDirective();
LexerToken MacroNameTok;
ReadMacroName(MacroNameTok, true);
@ -1345,6 +1369,10 @@ void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
///
void Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
++NumUndefined;
// Inform MIOpt that we found a side-effect of parsing this file.
CurLexer->MIOpt.ReadDirective();
LexerToken MacroNameTok;
ReadMacroName(MacroNameTok, true);
@ -1375,12 +1403,15 @@ void Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
//===----------------------------------------------------------------------===//
/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is
/// true when this is a #ifndef directive.
/// true when this is a #ifndef directive. ReadAnyTokensBeforeDirective is true
/// if any tokens have been returned or pp-directives activated before this
/// #ifndef has been lexed.
///
void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef,
bool ReadAnyTokensBeforeDirective) {
++NumIf;
LexerToken DirectiveTok = Result;
LexerToken MacroNameTok;
ReadMacroName(MacroNameTok);
@ -1389,7 +1420,14 @@ void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
return;
// Check to see if this is the last token on the #if[n]def line.
CheckEndOfDirective("#ifdef");
CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef");
// If the start of a top-level #ifdef, inform MIOpt.
if (!ReadAnyTokensBeforeDirective &&
CurLexer->getConditionalStackDepth() == 0) {
assert(isIfndef && "#ifdef shouldn't reach here");
CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
}
MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo();
@ -1413,6 +1451,11 @@ void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
///
void Preprocessor::HandleIfDirective(LexerToken &IfToken) {
++NumIf;
// FIXME: Detect "#if !defined(X)" for the MIOpt.
CurLexer->MIOpt.ReadDirective();
// Parse and evaluation the conditional expression.
bool ConditionalTrue = EvaluateDirectiveExpression();
// Should we include the stuff contained by this directive?
@ -1431,6 +1474,7 @@ void Preprocessor::HandleIfDirective(LexerToken &IfToken) {
///
void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
++NumEndif;
// Check that this is the whole directive.
CheckEndOfDirective("#endif");
@ -1440,6 +1484,10 @@ void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
return Diag(EndifToken, diag::err_pp_endif_without_if);
}
// If this the end of a top-level #endif, inform MIOpt.
if (CurLexer->getConditionalStackDepth() == 0)
CurLexer->MIOpt.ExitTopLevelConditional();
assert(!CondInfo.WasSkipping && !isSkipping() &&
"This code should only be reachable in the non-skipping case!");
}
@ -1447,12 +1495,17 @@ void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
void Preprocessor::HandleElseDirective(LexerToken &Result) {
++NumElse;
// #else directive in a non-skipping conditional... start skipping.
CheckEndOfDirective("#else");
PPConditionalInfo CI;
if (CurLexer->popConditionalLevel(CI))
return Diag(Result, diag::pp_err_else_without_if);
// If this is a top-level #else, inform the MIOpt.
if (CurLexer->getConditionalStackDepth() == 0)
CurLexer->MIOpt.FoundTopLevelElse();
// If this is a #else with a #else before it, report the error.
if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
@ -1465,6 +1518,7 @@ void Preprocessor::HandleElseDirective(LexerToken &Result) {
void Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
++NumElse;
// #elif directive in a non-skipping conditional... start skipping.
// We don't care what the condition is, because we will always skip it (since
// the block immediately before it was included).
@ -1474,6 +1528,10 @@ void Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
if (CurLexer->popConditionalLevel(CI))
return Diag(ElifToken, diag::pp_err_elif_without_if);
// If this is a top-level #elif, inform the MIOpt.
if (CurLexer->getConditionalStackDepth() == 0)
CurLexer->MIOpt.FoundTopLevelElse();
// If this is a #elif with a #else before it, report the error.
if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);

View File

@ -15,6 +15,7 @@
#define LLVM_CLANG_LEXER_H
#include "clang/Lex/LexerToken.h"
#include "clang/Lex/MultipleIncludeOpt.h"
#include <string>
#include <vector>
@ -66,7 +67,11 @@ class Lexer {
bool ParsingFilename; // True after #include: turn <xx> into string.
// Context that changes as the file is lexed.
/// MIOpt - This is a state machine that detects the #ifndef-wrapping a file
/// idiom for the multiple-include optimization.
MultipleIncludeOpt MIOpt;
/// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks
/// we are currently in.
std::vector<PPConditionalInfo> ConditionalStack;
@ -114,7 +119,8 @@ public:
IsAtStartOfLine = false;
}
// Get a token.
// Get a token. Note that this may delete the current lexer if the end of
// file is reached.
LexTokenInternal(Result);
}

View File

@ -0,0 +1,111 @@
//===--- MultipleIncludeOpt.h - Header Multiple-Include Optzn ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the MultipleIncludeOpt interface.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_MULTIPLEINCLUDEOPT_H
#define LLVM_CLANG_MULTIPLEINCLUDEOPT_H
namespace llvm {
namespace clang {
class IdentifierTokenInfo;
/// MultipleIncludeOpt - This class implements the simple state machine that the
/// Lexer class uses to detect files subject to the 'multiple-include'
/// optimization. The public methods in this class are triggered by various
/// events that occur when a file is lexed, and after the entire file is lexed,
/// information about which macro (if any) controls the header is returned.
class MultipleIncludeOpt {
/// ReadAnyTokens - This is set to false when a file is first opened and true
/// any time a token is returned to the client or a (non-multiple-include)
/// directive is parsed. When the final #endif is parsed this is reset back
/// to false, that way any tokens before the first #ifdef or after the last
/// #endif can be easily detected.
bool ReadAnyTokens;
/// TheMacro - The controlling macro for a file, if valid.
///
const IdentifierTokenInfo *TheMacro;
public:
MultipleIncludeOpt() : ReadAnyTokens(false), TheMacro(0) {}
/// Invalidate - Permenantly mark this file as not being suitable for the
/// include-file optimization.
void Invalidate() {
// If we have read tokens but have no controlling macro, the state-machine
// below can never "accept".
ReadAnyTokens = true;
TheMacro = 0;
}
/// getHasReadAnyTokensVal - This is used for the #ifndef hande-shake at the
/// top of the file when reading preprocessor directives. Otherwise, reading
/// the "ifndef x" would count as reading tokens.
bool getHasReadAnyTokensVal() const { return ReadAnyTokens; }
// If a token or directive is read, remember that we have seen a side-effect
// in this file.
void ReadToken() { ReadAnyTokens = true; }
void ReadDirective() { ReadAnyTokens = true; }
/// EnterTopLevelIFNDEF - When entering a top-level #ifndef directive (or the
/// "#if !defined" equivalent) without any preceding tokens, this method is
/// called.
void EnterTopLevelIFNDEF(const IdentifierTokenInfo *M) {
// Note, we don't care about the input value of 'ReadAnyTokens'. The caller
// ensures that this is only called if there are no tokens read before the
// #ifndef.
// If the macro is already set, this is after the top-level #endif.
if (TheMacro)
return Invalidate();
// Remember that we're in the #if and that we have the macro.
ReadAnyTokens = true;
TheMacro = M;
}
/// FoundTopLevelElse - This is invoked when an #else/#elif directive is found
/// in the top level conditional in the file.
void FoundTopLevelElse() {
/// If a #else directive is found at the top level, there is a chunk of the
/// file not guarded by the controlling macro.
Invalidate();
}
/// ExitTopLevelConditional - This method is called when the lexer exits the
/// top-level conditional.
void ExitTopLevelConditional() {
// If we have a macro, that means the top of the file was ok. Set our state
// back to "not having read any tokens" so we can detect anything after the
// #endif.
if (!TheMacro) return Invalidate();
// At this point, we haven't "read any tokens" but we do have a controlling
// macro.
ReadAnyTokens = false;
}
/// GetControllingMacroAtEndOfFile - Once the entire file has been lexed, if
/// there is a controlling macro, return it.
const IdentifierTokenInfo *GetControllingMacroAtEndOfFile() const {
// If we haven't read any tokens after the #endif, return the controlling
// macro if it's valid (if it isn't, it will be null).
if (!ReadAnyTokens)
return TheMacro;
return 0;
}
};
} // end namespace clang
} // end namespace llvm
#endif

View File

@ -482,7 +482,8 @@ private:
// HandleUnassertDirective(LexerToken &Tok);
// Conditional Inclusion.
void HandleIfdefDirective(LexerToken &Tok, bool isIfndef);
void HandleIfdefDirective(LexerToken &Tok, bool isIfndef,
bool ReadAnyTokensBeforeDirective);
void HandleIfDirective(LexerToken &Tok);
void HandleEndifDirective(LexerToken &Tok);
void HandleElseDirective(LexerToken &Tok);