llvm-project/clang/lib/Lex/PPMacroExpansion.cpp

605 lines
23 KiB
C++
Raw Normal View History

//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the top level handling of macro expasion for the
// preprocessor.
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/Preprocessor.h"
#include "MacroArgs.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/FileManager.h"
#include "clang/Lex/LexDiagnostic.h"
#include <cstdio>
#include <ctime>
using namespace clang;
/// setMacroInfo - Specify a macro for this identifier.
///
void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
2009-04-11 05:17:07 +08:00
if (MI) {
Macros[II] = MI;
II->setHasMacroDefinition(true);
2009-04-11 05:17:07 +08:00
} else if (II->hasMacroDefinition()) {
Macros.erase(II);
II->setHasMacroDefinition(false);
}
}
/// RegisterBuiltinMacro - Register the specified identifier in the identifier
/// table and mark it as a builtin macro to be expanded.
IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) {
// Get the identifier.
IdentifierInfo *Id = getIdentifierInfo(Name);
// Mark it as being a macro that is builtin.
MacroInfo *MI = AllocateMacroInfo(SourceLocation());
MI->setIsBuiltinMacro();
setMacroInfo(Id, MI);
return Id;
}
/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
/// identifier table.
void Preprocessor::RegisterBuiltinMacros() {
Ident__LINE__ = RegisterBuiltinMacro("__LINE__");
Ident__FILE__ = RegisterBuiltinMacro("__FILE__");
Ident__DATE__ = RegisterBuiltinMacro("__DATE__");
Ident__TIME__ = RegisterBuiltinMacro("__TIME__");
Ident__COUNTER__ = RegisterBuiltinMacro("__COUNTER__");
Ident_Pragma = RegisterBuiltinMacro("_Pragma");
// GCC Extensions.
Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__");
Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__");
Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__");
}
/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
/// in its expansion, currently expands to that token literally.
static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
const IdentifierInfo *MacroIdent,
Preprocessor &PP) {
IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
// If the token isn't an identifier, it's always literally expanded.
if (II == 0) return true;
// If the identifier is a macro, and if that macro is enabled, it may be
// expanded so it's not a trivial expansion.
if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
// Fast expanding "#define X X" is ok, because X would be disabled.
II != MacroIdent)
return false;
// If this is an object-like macro invocation, it is safe to trivially expand
// it.
if (MI->isObjectLike()) return true;
// If this is a function-like macro invocation, it's safe to trivially expand
// as long as the identifier is not a macro argument.
for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
I != E; ++I)
if (*I == II)
return false; // Identifier is a macro argument.
return true;
}
/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
/// lexed is a '('. If so, consume the token and return true, if not, this
/// method should have no observable side-effect on the lexed tokens.
bool Preprocessor::isNextPPTokenLParen() {
// Do some quick tests for rejection cases.
unsigned Val;
if (CurLexer)
Val = CurLexer->isNextPPTokenLParen();
else if (CurPTHLexer)
Val = CurPTHLexer->isNextPPTokenLParen();
else
Val = CurTokenLexer->isNextTokenLParen();
if (Val == 2) {
// We have run off the end. If it's a source file we don't
// examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
// macro stack.
if (CurPPLexer)
return false;
for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
IncludeStackInfo &Entry = IncludeMacroStack[i-1];
if (Entry.TheLexer)
Val = Entry.TheLexer->isNextPPTokenLParen();
else if (Entry.ThePTHLexer)
Val = Entry.ThePTHLexer->isNextPPTokenLParen();
else
Val = Entry.TheTokenLexer->isNextTokenLParen();
if (Val != 2)
break;
// Ran off the end of a source file?
if (Entry.ThePPLexer)
return false;
}
}
// Okay, if we know that the token is a '(', lex it and return. Otherwise we
// have found something that isn't a '(' or we found the end of the
// translation unit. In either case, return false.
return Val == 1;
}
/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
/// expanded as a macro, handle it and return the next token as 'Identifier'.
bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
MacroInfo *MI) {
if (Callbacks) Callbacks->MacroExpands(Identifier, MI);
// If this is a macro exapnsion in the "#if !defined(x)" line for the file,
// then the macro could expand to different things in other contexts, we need
// to disable the optimization in this case.
if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro();
// If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
if (MI->isBuiltinMacro()) {
ExpandBuiltinMacro(Identifier);
return false;
}
/// Args - If this is a function-like macro expansion, this contains,
/// for each macro argument, the list of tokens that were provided to the
/// invocation.
MacroArgs *Args = 0;
// Remember where the end of the instantiation occurred. For an object-like
// macro, this is the identifier. For a function-like macro, this is the ')'.
SourceLocation InstantiationEnd = Identifier.getLocation();
// If this is a function-like macro, read the arguments.
if (MI->isFunctionLike()) {
// C99 6.10.3p10: If the preprocessing token immediately after the the macro
// name isn't a '(', this macro should not be expanded.
if (!isNextPPTokenLParen())
return true;
// Remember that we are now parsing the arguments to a macro invocation.
// Preprocessor directives used inside macro arguments are not portable, and
// this enables the warning.
InMacroArgs = true;
Args = ReadFunctionLikeMacroArgs(Identifier, MI, InstantiationEnd);
// Finished parsing args.
InMacroArgs = false;
// If there was an error parsing the arguments, bail out.
if (Args == 0) return false;
++NumFnMacroExpanded;
} else {
++NumMacroExpanded;
}
// Notice that this macro has been used.
MI->setIsUsed(true);
// If we started lexing a macro, enter the macro expansion body.
// If this macro expands to no tokens, don't bother to push it onto the
// expansion stack, only to take it right back off.
if (MI->getNumTokens() == 0) {
// No need for arg info.
if (Args) Args->destroy();
// Ignore this macro use, just return the next token in the current
// buffer.
bool HadLeadingSpace = Identifier.hasLeadingSpace();
bool IsAtStartOfLine = Identifier.isAtStartOfLine();
Lex(Identifier);
// If the identifier isn't on some OTHER line, inherit the leading
// whitespace/first-on-a-line property of this token. This handles
// stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is
// empty.
if (!Identifier.isAtStartOfLine()) {
if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
}
++NumFastMacroExpanded;
return false;
} else if (MI->getNumTokens() == 1 &&
isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
*this)) {
// Otherwise, if this macro expands into a single trivially-expanded
// token: expand it now. This handles common cases like
// "#define VAL 42".
// No need for arg info.
if (Args) Args->destroy();
// Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
// identifier to the expanded token.
bool isAtStartOfLine = Identifier.isAtStartOfLine();
bool hasLeadingSpace = Identifier.hasLeadingSpace();
// Remember where the token is instantiated.
SourceLocation InstantiateLoc = Identifier.getLocation();
// Replace the result token.
Identifier = MI->getReplacementToken(0);
// Restore the StartOfLine/LeadingSpace markers.
Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
// Update the tokens location to include both its instantiation and physical
// locations.
SourceLocation Loc =
SourceMgr.createInstantiationLoc(Identifier.getLocation(), InstantiateLoc,
InstantiationEnd,Identifier.getLength());
Identifier.setLocation(Loc);
// If this is #define X X, we must mark the result as unexpandible.
if (IdentifierInfo *NewII = Identifier.getIdentifierInfo())
if (getMacroInfo(NewII) == MI)
Identifier.setFlag(Token::DisableExpand);
// Since this is not an identifier token, it can't be macro expanded, so
// we're done.
++NumFastMacroExpanded;
return false;
}
// Start expanding the macro.
EnterMacro(Identifier, InstantiationEnd, Args);
// Now that the macro is at the top of the include stack, ask the
// preprocessor to read the next token from it.
Lex(Identifier);
return false;
}
/// ReadFunctionLikeMacroArgs - After reading "MACRO" and knowing that the next
/// token is the '(' of the macro, this method is invoked to read all of the
/// actual arguments specified for the macro invocation. This returns null on
/// error.
MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
MacroInfo *MI,
SourceLocation &MacroEnd) {
// The number of fixed arguments to parse.
unsigned NumFixedArgsLeft = MI->getNumArgs();
bool isVariadic = MI->isVariadic();
// Outer loop, while there are more arguments, keep reading them.
Token Tok;
// Read arguments as unexpanded tokens. This avoids issues, e.g., where
// an argument value in a macro could expand to ',' or '(' or ')'.
LexUnexpandedToken(Tok);
assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
// ArgTokens - Build up a list of tokens that make up each argument. Each
// argument is separated by an EOF token. Use a SmallVector so we can avoid
// heap allocations in the common case.
llvm::SmallVector<Token, 64> ArgTokens;
unsigned NumActuals = 0;
while (Tok.isNot(tok::r_paren)) {
assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) &&
"only expect argument separators here");
unsigned ArgTokenStart = ArgTokens.size();
SourceLocation ArgStartLoc = Tok.getLocation();
// C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note
// that we already consumed the first one.
unsigned NumParens = 0;
while (1) {
// Read arguments as unexpanded tokens. This avoids issues, e.g., where
// an argument value in a macro could expand to ',' or '(' or ')'.
LexUnexpandedToken(Tok);
if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n"
Diag(MacroName, diag::err_unterm_macro_invoc);
// Do not lose the EOF/EOM. Return it to the client.
MacroName = Tok;
return 0;
} else if (Tok.is(tok::r_paren)) {
// If we found the ) token, the macro arg list is done.
if (NumParens-- == 0) {
MacroEnd = Tok.getLocation();
break;
}
} else if (Tok.is(tok::l_paren)) {
++NumParens;
} else if (Tok.is(tok::comma) && NumParens == 0) {
// Comma ends this argument if there are more fixed arguments expected.
// However, if this is a variadic macro, and this is part of the
// variadic part, then the comma is just an argument token.
if (!isVariadic) break;
if (NumFixedArgsLeft > 1)
break;
} else if (Tok.is(tok::comment) && !KeepMacroComments) {
// If this is a comment token in the argument list and we're just in
// -C mode (not -CC mode), discard the comment.
continue;
} else if (Tok.getIdentifierInfo() != 0) {
// Reading macro arguments can cause macros that we are currently
// expanding from to be popped off the expansion stack. Doing so causes
// them to be reenabled for expansion. Here we record whether any
// identifiers we lex as macro arguments correspond to disabled macros.
// If so, we mark the token as noexpand. This is a subtle aspect of
// C99 6.10.3.4p2.
if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
if (!MI->isEnabled())
Tok.setFlag(Token::DisableExpand);
}
ArgTokens.push_back(Tok);
}
// If this was an empty argument list foo(), don't add this as an empty
// argument.
if (ArgTokens.empty() && Tok.getKind() == tok::r_paren)
break;
// If this is not a variadic macro, and too many args were specified, emit
// an error.
if (!isVariadic && NumFixedArgsLeft == 0) {
if (ArgTokens.size() != ArgTokenStart)
ArgStartLoc = ArgTokens[ArgTokenStart].getLocation();
// Emit the diagnostic at the macro name in case there is a missing ).
// Emitting it at the , could be far away from the macro name.
Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
return 0;
}
// Empty arguments are standard in C99 and supported as an extension in
// other modes.
if (ArgTokens.size() == ArgTokenStart && !Features.C99)
Diag(Tok, diag::ext_empty_fnmacro_arg);
// Add a marker EOF token to the end of the token list for this argument.
Token EOFTok;
EOFTok.startToken();
EOFTok.setKind(tok::eof);
EOFTok.setLocation(Tok.getLocation());
EOFTok.setLength(0);
ArgTokens.push_back(EOFTok);
++NumActuals;
assert(NumFixedArgsLeft != 0 && "Too many arguments parsed");
--NumFixedArgsLeft;
}
// Okay, we either found the r_paren. Check to see if we parsed too few
// arguments.
unsigned MinArgsExpected = MI->getNumArgs();
// See MacroArgs instance var for description of this.
bool isVarargsElided = false;
if (NumActuals < MinArgsExpected) {
// There are several cases where too few arguments is ok, handle them now.
if (NumActuals == 0 && MinArgsExpected == 1) {
// #define A(X) or #define A(...) ---> A()
// If there is exactly one argument, and that argument is missing,
// then we have an empty "()" argument empty list. This is fine, even if
// the macro expects one argument (the argument is just empty).
isVarargsElided = MI->isVariadic();
} else if (MI->isVariadic() &&
(NumActuals+1 == MinArgsExpected || // A(x, ...) -> A(X)
(NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A()
// Varargs where the named vararg parameter is missing: ok as extension.
// #define A(x, ...)
// A("blah")
Diag(Tok, diag::ext_missing_varargs_arg);
// Remember this occurred, allowing us to elide the comma when used for
// cases like:
// #define A(x, foo...) blah(a, ## foo)
// #define B(x, ...) blah(a, ## __VA_ARGS__)
// #define C(...) blah(a, ## __VA_ARGS__)
// A(x) B(x) C()
isVarargsElided = true;
} else {
// Otherwise, emit the error.
Diag(Tok, diag::err_too_few_args_in_macro_invoc);
return 0;
}
// Add a marker EOF token to the end of the token list for this argument.
SourceLocation EndLoc = Tok.getLocation();
Tok.startToken();
Tok.setKind(tok::eof);
Tok.setLocation(EndLoc);
Tok.setLength(0);
ArgTokens.push_back(Tok);
// If we expect two arguments, add both as empty.
if (NumActuals == 0 && MinArgsExpected == 2)
ArgTokens.push_back(Tok);
} else if (NumActuals > MinArgsExpected && !MI->isVariadic()) {
// Emit the diagnostic at the macro name in case there is a missing ).
// Emitting it at the , could be far away from the macro name.
Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
return 0;
}
return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided);
}
/// ComputeDATE_TIME - Compute the current time, enter it into the specified
/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
/// the identifier tokens inserted.
static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
Preprocessor &PP) {
time_t TT = time(0);
struct tm *TM = localtime(&TT);
static const char * const Months[] = {
"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
};
char TmpBuffer[100];
sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
TM->tm_year+1900);
Token TmpTok;
TmpTok.startToken();
PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
DATELoc = TmpTok.getLocation();
sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
TIMELoc = TmpTok.getLocation();
}
/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
/// as a builtin macro, handle it and return the next token as 'Tok'.
void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
// Figure out which token this is.
IdentifierInfo *II = Tok.getIdentifierInfo();
assert(II && "Can't be a macro without id info!");
// If this is an _Pragma directive, expand it, invoke the pragma handler, then
// lex the token after it.
if (II == Ident_Pragma)
return Handle_Pragma(Tok);
++NumBuiltinMacroExpanded;
char TmpBuffer[100];
// Set up the return result.
Tok.setIdentifierInfo(0);
Tok.clearFlag(Token::NeedsCleaning);
if (II == Ident__LINE__) {
// C99 6.10.8: "__LINE__: The presumed line number (within the current
// source file) of the current source line (an integer constant)". This can
// be affected by #line.
SourceLocation Loc = Tok.getLocation();
// Advance to the location of the first _, this might not be the first byte
// of the token if it starts with an escaped newline.
Loc = AdvanceToTokenCharacter(Loc, 0);
// One wrinkle here is that GCC expands __LINE__ to location of the *end* of
// a macro instantiation. This doesn't matter for object-like macros, but
// can matter for a function-like macro that expands to contain __LINE__.
// Skip down through instantiation points until we find a file loc for the
// end of the instantiation history.
Loc = SourceMgr.getInstantiationRange(Loc).second;
PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
// __LINE__ expands to a simple numeric value.
sprintf(TmpBuffer, "%u", PLoc.getLine());
Tok.setKind(tok::numeric_constant);
CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation());
} else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
// C99 6.10.8: "__FILE__: The presumed name of the current source file (a
// character string literal)". This can be affected by #line.
PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
// __BASE_FILE__ is a GNU extension that returns the top of the presumed
// #include stack instead of the current file.
if (II == Ident__BASE_FILE__) {
Diag(Tok, diag::ext_pp_base_file);
SourceLocation NextLoc = PLoc.getIncludeLoc();
while (NextLoc.isValid()) {
PLoc = SourceMgr.getPresumedLoc(NextLoc);
NextLoc = PLoc.getIncludeLoc();
}
}
// Escape this filename. Turn '\' -> '\\' '"' -> '\"'
std::string FN = PLoc.getFilename();
FN = '"' + Lexer::Stringify(FN) + '"';
Tok.setKind(tok::string_literal);
CreateString(&FN[0], FN.size(), Tok, Tok.getLocation());
} else if (II == Ident__DATE__) {
if (!DATELoc.isValid())
ComputeDATE_TIME(DATELoc, TIMELoc, *this);
Tok.setKind(tok::string_literal);
Tok.setLength(strlen("\"Mmm dd yyyy\""));
Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(),
Tok.getLocation(),
Tok.getLength()));
} else if (II == Ident__TIME__) {
if (!TIMELoc.isValid())
ComputeDATE_TIME(DATELoc, TIMELoc, *this);
Tok.setKind(tok::string_literal);
Tok.setLength(strlen("\"hh:mm:ss\""));
Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(),
Tok.getLocation(),
Tok.getLength()));
} else if (II == Ident__INCLUDE_LEVEL__) {
Diag(Tok, diag::ext_pp_include_level);
// Compute the presumed include depth of this token. This can be affected
// by GNU line markers.
unsigned Depth = 0;
PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
for (; PLoc.isValid(); ++Depth)
PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
// __INCLUDE_LEVEL__ expands to a simple numeric value.
sprintf(TmpBuffer, "%u", Depth);
Tok.setKind(tok::numeric_constant);
CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation());
} else if (II == Ident__TIMESTAMP__) {
// MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
// of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
Diag(Tok, diag::ext_pp_timestamp);
// Get the file that we are lexing out of. If we're currently lexing from
// a macro, dig into the include stack.
const FileEntry *CurFile = 0;
PreprocessorLexer *TheLexer = getCurrentFileLexer();
if (TheLexer)
CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID());
// If this file is older than the file it depends on, emit a diagnostic.
const char *Result;
if (CurFile) {
time_t TT = CurFile->getModificationTime();
struct tm *TM = localtime(&TT);
Result = asctime(TM);
} else {
Result = "??? ??? ?? ??:??:?? ????\n";
}
TmpBuffer[0] = '"';
strcpy(TmpBuffer+1, Result);
unsigned Len = strlen(TmpBuffer);
TmpBuffer[Len] = '"'; // Replace the newline with a quote.
Tok.setKind(tok::string_literal);
CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
} else if (II == Ident__COUNTER__) {
Diag(Tok, diag::ext_pp_counter);
// __COUNTER__ expands to a simple numeric value.
sprintf(TmpBuffer, "%u", CounterValue++);
Tok.setKind(tok::numeric_constant);
CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation());
} else {
assert(0 && "Unknown identifier!");
}
}