From 8b3f6235826cb989852b44e30a26dc5a20e59128 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Wed, 29 Aug 2012 00:20:03 +0000 Subject: [PATCH] Keep history of macro definitions and #undefs Summary: Summary: Keep history of macro definitions and #undefs with corresponding source locations, so that we can later find out all macros active in a specified source location. We don't save the history in PCH (no need currently). Memory overhead is about sizeof(void*)*3*+ I've run a test on a file composed of 109 .h files from boost 1.49 on x86-64 linux. Stats before this patch: *** Preprocessor Stats: 73222 directives found: 19171 #define. 4345 #undef. #include/#include_next/#import: 5233 source files entered. 27 max include stack depth 19210 #if/#ifndef/#ifdef. 2384 #else/#elif. 6891 #endif. 408 #pragma. 14466 #if/#ifndef#ifdef regions skipped 80023/451669/1270 obj/fn/builtin macros expanded, 85724 on the fast path. 127145 token paste (##) operations performed, 11008 on the fast path. Preprocessor Memory: 5874615B total BumpPtr: 4399104 Macro Expanded Tokens: 417768 Predefines Buffer: 8135 Macros: 1048576 #pragma push_macro Info: 0 Poison Reasons: 1024 Comment Handlers: 8 Stats with this patch: ... Preprocessor Memory: 7541687B total BumpPtr: 6066176 Macro Expanded Tokens: 417768 Predefines Buffer: 8135 Macros: 1048576 #pragma push_macro Info: 0 Poison Reasons: 1024 Comment Handlers: 8 In my test increase in memory usage is about 1.7Mb, which is ~28% of initial preprocessor's memory usage and about 0.8% of clang's total VMM allocation. As for CPU overhead, it should only be noticeable when iterating over all macros, and should mostly consist of couple extra dereferences and one comparison per macro + skipping of #undef'd macros. It's less trivial to measure, though, as the preprocessor consumes a very small fraction of compilation time. Reviewers: doug.gregor, klimek, rsmith, djasper Reviewed By: doug.gregor CC: cfe-commits, chandlerc Differential Revision: http://llvm-reviews.chandlerc.com/D28 llvm-svn: 162810 --- clang/include/clang/Lex/MacroInfo.h | 29 +++++++- clang/include/clang/Lex/Preprocessor.h | 11 ++- .../lib/Frontend/PrintPreprocessedOutput.cpp | 8 +- clang/lib/Lex/MacroInfo.cpp | 74 ++++++++++--------- clang/lib/Lex/PPDirectives.cpp | 11 +-- clang/lib/Lex/PPMacroExpansion.cpp | 23 +++--- clang/lib/Lex/Pragma.cpp | 6 +- clang/lib/Sema/SemaCodeComplete.cpp | 6 +- clang/lib/Serialization/ASTWriter.cpp | 10 ++- 9 files changed, 107 insertions(+), 71 deletions(-) diff --git a/clang/include/clang/Lex/MacroInfo.h b/clang/include/clang/Lex/MacroInfo.h index cbd201f849de..ba3c30d16ad1 100644 --- a/clang/include/clang/Lex/MacroInfo.h +++ b/clang/include/clang/Lex/MacroInfo.h @@ -32,6 +32,12 @@ class MacroInfo { SourceLocation Location; /// EndLocation - The location of the last token in the macro. SourceLocation EndLocation; + /// \brief The location where the macro was #undef'd, or an invalid location + /// for macros that haven't been undefined. + SourceLocation UndefLocation; + /// \brief Previous definition, the identifier of this macro was defined to, + /// or NULL. + MacroInfo *PreviousDefinition; /// Arguments - The list of arguments for a function-like macro. This can be /// empty, for, e.g. "#define X()". In a C99-style variadic macro, this @@ -128,10 +134,31 @@ public: /// setDefinitionEndLoc - Set the location of the last token in the macro. /// void setDefinitionEndLoc(SourceLocation EndLoc) { EndLocation = EndLoc; } + /// getDefinitionEndLoc - Return the location of the last token in the macro. /// SourceLocation getDefinitionEndLoc() const { return EndLocation; } - + + /// \brief Set the location where macro was undefined. Can only be set once. + void setUndefLoc(SourceLocation UndefLoc) { + assert(UndefLocation.isInvalid() && "UndefLocation is already set!"); + assert(UndefLoc.isValid() && "Invalid UndefLoc!"); + UndefLocation = UndefLoc; + } + + /// \brief Get the location where macro was undefined. + SourceLocation getUndefLoc() const { return UndefLocation; } + + /// \brief Set previous definition of the macro with the same name. Can only + /// be set once. + void setPreviousDefinition(MacroInfo *PreviousDef) { + assert(!PreviousDefinition && "PreviousDefiniton is already set!"); + PreviousDefinition = PreviousDef; + } + + /// \brief Get previous definition of the macro with the same name. + MacroInfo *getPreviousDefinition() { return PreviousDefinition; } + /// \brief Get length in characters of the macro definition. unsigned getDefinitionLength(SourceManager &SM) const { if (IsDefinitionLengthCached) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 2d2a31f61155..fca5796068b8 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -274,8 +274,9 @@ class Preprocessor : public RefCountedBase { }; SmallVector DelayedMacroExpandsCallbacks; - /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping - /// to the actual definition of the macro. + /// Macros - For each IdentifierInfo that was associated with a macro, we + /// keep a mapping to the history of all macro definitions and #undefs in + /// the reverse order (the latest one is in the head of the list). llvm::DenseMap Macros; /// \brief Macros that we want to warn because they are not used at the end @@ -470,8 +471,10 @@ public: void setMacroInfo(IdentifierInfo *II, MacroInfo *MI, bool LoadedFromAST = false); - /// macro_iterator/macro_begin/macro_end - This allows you to walk the current - /// state of the macro table. This visits every currently-defined macro. + /// macro_iterator/macro_begin/macro_end - This allows you to walk the macro + /// history table. Currently defined macros have + /// IdentifierInfo::hasMacroDefinition() set and an empty + /// MacroInfo::getUndefLoc() at the head of the list. typedef llvm::DenseMap::const_iterator macro_iterator; macro_iterator macro_begin(bool IncludeExternalMacros = true) const; diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 5311ed52cd29..30707dc0c008 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -570,8 +570,12 @@ static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) { do PP.Lex(Tok); while (Tok.isNot(tok::eof)); - SmallVector - MacrosByID(PP.macro_begin(), PP.macro_end()); + SmallVector MacrosByID; + for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end(); + I != E; ++I) { + if (I->first->hasMacroDefinition()) + MacrosByID.push_back(id_macro_pair(I->first, I->second)); + } llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare); for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) { diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp index 3d0c9a1c2b51..7a4964f7d26c 100644 --- a/clang/lib/Lex/MacroInfo.cpp +++ b/clang/lib/Lex/MacroInfo.cpp @@ -15,44 +15,46 @@ #include "clang/Lex/Preprocessor.h" using namespace clang; -MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) { - IsFunctionLike = false; - IsC99Varargs = false; - IsGNUVarargs = false; - IsBuiltinMacro = false; - IsFromAST = false; - ChangedAfterLoad = false; - IsDisabled = false; - IsUsed = false; - IsAllowRedefinitionsWithoutWarning = false; - IsWarnIfUnused = false; - IsDefinitionLengthCached = false; - IsPublic = true; - - ArgumentList = 0; - NumArguments = 0; +MacroInfo::MacroInfo(SourceLocation DefLoc) + : Location(DefLoc), + PreviousDefinition(0), + ArgumentList(0), + NumArguments(0), + IsDefinitionLengthCached(false), + IsFunctionLike(false), + IsC99Varargs(false), + IsGNUVarargs(false), + IsBuiltinMacro(false), + IsFromAST(false), + ChangedAfterLoad(false), + IsDisabled(false), + IsUsed(false), + IsAllowRedefinitionsWithoutWarning(false), + IsWarnIfUnused(false), + IsPublic(true) { } -MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) { - Location = MI.Location; - EndLocation = MI.EndLocation; - ReplacementTokens = MI.ReplacementTokens; - IsFunctionLike = MI.IsFunctionLike; - IsC99Varargs = MI.IsC99Varargs; - IsGNUVarargs = MI.IsGNUVarargs; - IsBuiltinMacro = MI.IsBuiltinMacro; - IsFromAST = MI.IsFromAST; - ChangedAfterLoad = MI.ChangedAfterLoad; - IsDisabled = MI.IsDisabled; - IsUsed = MI.IsUsed; - IsAllowRedefinitionsWithoutWarning = MI.IsAllowRedefinitionsWithoutWarning; - IsWarnIfUnused = MI.IsWarnIfUnused; - IsDefinitionLengthCached = MI.IsDefinitionLengthCached; - DefinitionLength = MI.DefinitionLength; - IsPublic = MI.IsPublic; - - ArgumentList = 0; - NumArguments = 0; +MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) + : Location(MI.Location), + EndLocation(MI.EndLocation), + UndefLocation(MI.UndefLocation), + PreviousDefinition(0), + ArgumentList(0), + NumArguments(0), + ReplacementTokens(MI.ReplacementTokens), + DefinitionLength(MI.DefinitionLength), + IsDefinitionLengthCached(MI.IsDefinitionLengthCached), + IsFunctionLike(MI.IsFunctionLike), + IsC99Varargs(MI.IsC99Varargs), + IsGNUVarargs(MI.IsGNUVarargs), + IsBuiltinMacro(MI.IsBuiltinMacro), + IsFromAST(MI.IsFromAST), + ChangedAfterLoad(MI.ChangedAfterLoad), + IsDisabled(MI.IsDisabled), + IsUsed(MI.IsUsed), + IsAllowRedefinitionsWithoutWarning(MI.IsAllowRedefinitionsWithoutWarning), + IsWarnIfUnused(MI.IsWarnIfUnused), + IsPublic(MI.IsPublic) { setArgumentList(MI.ArgumentList, MI.NumArguments, PPAllocator); } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 74b9cbc881ac..6de0e4a9f36e 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1849,7 +1849,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { MI->setDefinitionEndLoc(LastTok.getLocation()); // Finally, if this identifier already had a macro defined for it, verify that - // the macro bodies are identical and free the old definition. + // the macro bodies are identical, and issue diagnostics if they are not. if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) { // It is very common for system headers to have tons of macro redefinitions // and for warnings to be disabled in system headers. If this is the case, @@ -1870,7 +1870,6 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { } if (OtherMI->isWarnIfUnused()) WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc()); - ReleaseMacroInfo(OtherMI); } setMacroInfo(MacroNameTok.getIdentifierInfo(), MI); @@ -1921,9 +1920,11 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) { if (MI->isWarnIfUnused()) WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); - // Free macro definition. - ReleaseMacroInfo(MI); - setMacroInfo(MacroNameTok.getIdentifierInfo(), 0); + MI->setUndefLoc(MacroNameTok.getLocation()); + IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); + II->setHasMacroDefinition(false); + if (II->isFromAST()) + II->setChangedSinceDeserialization(); } diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 96d6b2c7297d..936b37009c12 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -33,15 +33,15 @@ using namespace clang; MacroInfo *Preprocessor::getInfoForMacro(IdentifierInfo *II) const { assert(II->hasMacroDefinition() && "Identifier is not a macro!"); - - llvm::DenseMap::const_iterator Pos - = Macros.find(II); + + macro_iterator Pos = Macros.find(II); if (Pos == Macros.end()) { // Load this macro from the external source. getExternalSource()->LoadMacroDefinition(II); Pos = Macros.find(II); } assert(Pos != Macros.end() && "Identifier macro info is missing!"); + assert(Pos->second->getUndefLoc().isInvalid() && "Macro is undefined!"); return Pos->second; } @@ -49,17 +49,12 @@ MacroInfo *Preprocessor::getInfoForMacro(IdentifierInfo *II) const { /// void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI, bool LoadedFromAST) { - if (MI) { - Macros[II] = MI; - II->setHasMacroDefinition(true); - if (II->isFromAST() && !LoadedFromAST) - II->setChangedSinceDeserialization(); - } else if (II->hasMacroDefinition()) { - Macros.erase(II); - II->setHasMacroDefinition(false); - if (II->isFromAST() && !LoadedFromAST) - II->setChangedSinceDeserialization(); - } + assert(MI && "MacroInfo should be non-zero!"); + MI->setPreviousDefinition(Macros[II]); + Macros[II] = MI; + II->setHasMacroDefinition(true); + if (II->isFromAST() && !LoadedFromAST) + II->setChangedSinceDeserialization(); } /// RegisterBuiltinMacro - Register the specified identifier in the identifier diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index b7f7d1d4a90e..acf5c74556f4 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -733,12 +733,10 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { llvm::DenseMap >::iterator iter = PragmaPushMacroInfo.find(IdentInfo); if (iter != PragmaPushMacroInfo.end()) { - // Release the MacroInfo currently associated with IdentInfo. - MacroInfo *CurrentMI = getMacroInfo(IdentInfo); - if (CurrentMI) { + // Forget the MacroInfo currently associated with IdentInfo. + if (MacroInfo *CurrentMI = getMacroInfo(IdentInfo)) { if (CurrentMI->isWarnIfUnused()) WarnUnusedMacroLocs.erase(CurrentMI->getDefinitionLoc()); - ReleaseMacroInfo(CurrentMI); } // Get the MacroInfo we want to reinstall. diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 93413da00d89..05b22fa5371e 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -2904,7 +2904,11 @@ static void AddMacroResults(Preprocessor &PP, ResultBuilder &Results, for (Preprocessor::macro_iterator M = PP.macro_begin(), MEnd = PP.macro_end(); M != MEnd; ++M) { - Results.AddResult(Result(M->first, + // FIXME: Eventually, we'd want to be able to look back to the macro + // definition that was actually active at the point of code completion (even + // if that macro has since been #undef'd). + if (M->first->hasMacroDefinition()) + Results.AddResult(Result(M->first, getMacroUsagePriority(M->first->getName(), PP.getLangOpts(), TargetTypeIsPointer))); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a64e0d357953..4a3987cbdc74 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1677,10 +1677,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { for (Preprocessor::macro_iterator I = PP.macro_begin(Chain == 0), E = PP.macro_end(Chain == 0); I != E; ++I) { - const IdentifierInfo *Name = I->first; - if (!IsModule || I->second->isPublic()) { - MacroDefinitionsSeen.insert(Name); - MacrosToEmit.push_back(std::make_pair(I->first, I->second)); + // FIXME: We'll need to store macro history in PCH. + if (I->first->hasMacroDefinition()) { + if (!IsModule || I->second->isPublic()) { + MacroDefinitionsSeen.insert(I->first); + MacrosToEmit.push_back(std::make_pair(I->first, I->second)); + } } }