From 603cd869f7cdb0da7a545e86a1786f3175f72475 Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Fri, 22 Mar 2013 18:50:14 +0000 Subject: [PATCH] Simplify ModuleManager/GlobalModuleIndex interaction to eliminate a pile of extraneous stats(). The refactoring in r177367 introduced a serious performance bug where the "lazy" resolution of module file names in the global module index to actual module file entries in the module manager would perform repeated negative stats(). The new interaction requires the module manager to inform the global module index when a module file has been loaded, eliminating the extraneous stat()s and a bunch of bookkeeping on both sides. llvm-svn: 177750 --- .../clang/Serialization/GlobalModuleIndex.h | 65 +++---------- .../clang/Serialization/ModuleManager.h | 18 ++-- clang/lib/Serialization/ASTReader.cpp | 7 +- clang/lib/Serialization/GlobalModuleIndex.cpp | 96 ++++++------------- clang/lib/Serialization/ModuleManager.cpp | 65 ++++--------- 5 files changed, 76 insertions(+), 175 deletions(-) diff --git a/clang/include/clang/Serialization/GlobalModuleIndex.h b/clang/include/clang/Serialization/GlobalModuleIndex.h index c7e9ab4535d2..eaf26d1df16f 100644 --- a/clang/include/clang/Serialization/GlobalModuleIndex.h +++ b/clang/include/clang/Serialization/GlobalModuleIndex.h @@ -20,6 +20,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include @@ -43,36 +44,6 @@ using llvm::SmallVectorImpl; using llvm::StringRef; using serialization::ModuleFile; -/// \brief Abstract class that resolves a module file name to a ModuleFile -/// pointer, which is used to uniquely describe a module file. -class ModuleFileNameResolver { -public: - virtual ~ModuleFileNameResolver(); - - /// \brief Attempt to resolve the given module file name to a specific, - /// already-loaded module. - /// - /// \param FileName The name of the module file. - /// - /// \param ExpectedSize The size that the module file is expected to have. - /// If the actual size differs, the resolver should return \c true. - /// - /// \param ExpectedModTime The modification time that the module file is - /// expected to have. If the actual modification time differs, the resolver - /// should return \c true. - /// - /// \param File Will be set to the module file if there is one, or null - /// otherwise. - /// - /// \returns True if a module file exists but does not meet the size/ - /// modification time criteria, false if the module file is available or has - /// not yet been loaded. - virtual bool resolveModuleFileName(StringRef FileName, - off_t ExpectedSize, - time_t ExpectedModTime, - ModuleFile *&File) = 0; -}; - /// \brief A global index for a set of module files, providing information about /// the identifiers within those module files. /// @@ -89,9 +60,6 @@ class GlobalModuleIndex { /// as the global module index is live. llvm::OwningPtr Buffer; - /// \brief The module file name resolver. - ModuleFileNameResolver *Resolver; - /// \brief The hash table. /// /// This pointer actually points to a IdentifierIndexTable object, @@ -103,7 +71,7 @@ class GlobalModuleIndex { struct ModuleInfo { ModuleInfo() : File(), Size(), ModTime() { } - /// \brief The module file, if it is known. + /// \brief The module file, once it has been resolved. ModuleFile *File; /// \brief The module file name. @@ -119,9 +87,6 @@ class GlobalModuleIndex { /// \brief The module IDs on which this module directly depends. /// FIXME: We don't really need a vector here. llvm::SmallVector Dependencies; - - /// \brief The module IDs that directly depend on this module. - llvm::SmallVector ImportedBy; }; /// \brief A mapping from module IDs to information about each module. @@ -135,13 +100,19 @@ class GlobalModuleIndex { /// corresponding index into the \c Modules vector. llvm::DenseMap ModulesByFile; + /// \brief The set of modules that have not yet been resolved. + /// + /// The string is just the name of the module itself, which maps to the + /// module ID. + llvm::StringMap UnresolvedModules; + /// \brief The number of identifier lookups we performed. unsigned NumIdentifierLookups; /// \brief The number of identifier lookup hits, where we recognize the /// identifier. unsigned NumIdentifierLookupHits; - + /// \brief Internal constructor. Use \c readIndex() to read an index. explicit GlobalModuleIndex(llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor); @@ -200,19 +171,11 @@ public: /// \returns true if the identifier is known to the index, false otherwise. bool lookupIdentifier(StringRef Name, HitSet &Hits); - /// \brief Set the module file name resolver. - void setResolver(ModuleFileNameResolver *Resolver) { - this->Resolver = Resolver; - } - - /// \brief Note that additional modules have been loaded, which invalidates - /// the module file -> module cache. - void noteAdditionalModulesLoaded() { - ModulesByFile.clear(); - } - - /// \brief Resolve the module file for the module with the given ID. - ModuleFile *resolveModuleFile(unsigned ID); + /// \brief Note that the given module file has been loaded. + /// + /// \returns false if the global module index has information about this + /// module file, and true otherwise. + bool loadedModuleFile(ModuleFile *File); /// \brief Print statistics to standard error. void printStats(); diff --git a/clang/include/clang/Serialization/ModuleManager.h b/clang/include/clang/Serialization/ModuleManager.h index 9b58b75ebbc0..c6e3265b3b32 100644 --- a/clang/include/clang/Serialization/ModuleManager.h +++ b/clang/include/clang/Serialization/ModuleManager.h @@ -16,18 +16,18 @@ #define LLVM_CLANG_SERIALIZATION_MODULE_MANAGER_H #include "clang/Basic/FileManager.h" -#include "clang/Serialization/GlobalModuleIndex.h" #include "clang/Serialization/Module.h" #include "llvm/ADT/DenseMap.h" namespace clang { +class GlobalModuleIndex; class ModuleMap; namespace serialization { - + /// \brief Manages the set of modules loaded by an AST reader. -class ModuleManager : public ModuleFileNameResolver { +class ModuleManager { /// \brief The chain of AST files. The first entry is the one named by the /// user, the last one is the one that doesn't depend on anything further. SmallVector Chain; @@ -61,9 +61,6 @@ class ModuleManager : public ModuleFileNameResolver { /// just an non-owning pointer. GlobalModuleIndex *GlobalIndex; - /// \brief Update the set of modules files we know about known to the global index. - void updateModulesInCommonWithGlobalIndex(); - /// \brief State used by the "visit" operation to avoid malloc traffic in /// calls to visit(). struct VisitState { @@ -202,6 +199,10 @@ public: /// \brief Set the global module index. void setGlobalIndex(GlobalModuleIndex *Index); + /// \brief Notification from the AST reader that the given module file + /// has been "accepted", and will not (can not) be unloaded. + void moduleFileAccepted(ModuleFile *MF); + /// \brief Visit each of the modules. /// /// This routine visits each of the modules, starting with the @@ -270,11 +271,6 @@ public: time_t ExpectedModTime, const FileEntry *&File); - virtual bool resolveModuleFileName(StringRef FileName, - off_t ExpectedSize, - time_t ExpectedModTime, - ModuleFile *&File); - /// \brief View the graphviz representation of the module graph. void viewGraph(); }; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 29538a13ef1a..0f674530bc67 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2872,11 +2872,16 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName, } } - // Setup the import locations. + // Setup the import locations and notify the module manager that we've + // committed to these module files. for (SmallVectorImpl::iterator M = Loaded.begin(), MEnd = Loaded.end(); M != MEnd; ++M) { ModuleFile &F = *M->Mod; + + ModuleMgr.moduleFileAccepted(&F); + + // Set the import location. F.DirectImportLoc = ImportLoc; if (!M->ImportedBy) F.ImportLoc = M->ImportLoc; diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp index 8b5851dbe5d1..f9acb847284d 100644 --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/OnDiskHashTable.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/GlobalModuleIndex.h" +#include "clang/Serialization/Module.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" @@ -57,8 +58,6 @@ static const char * const IndexFileName = "modules.idx"; /// \brief The global index file version. static const unsigned CurrentVersion = 1; -ModuleFileNameResolver::~ModuleFileNameResolver() { } - //----------------------------------------------------------------------------// // Global module index reader. //----------------------------------------------------------------------------// @@ -121,7 +120,7 @@ typedef OnDiskChainedHashTable IdentifierIndexTable; GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, llvm::BitstreamCursor Cursor) - : Buffer(Buffer), Resolver(), IdentifierIndex(), + : Buffer(Buffer), IdentifierIndex(), NumIdentifierLookups(), NumIdentifierLookupHits() { // Read the global index. @@ -201,6 +200,9 @@ GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, // Make sure we're at the end of the record. assert(Idx == Record.size() && "More module info?"); + + // Record this module as an unresolved module. + UnresolvedModules[llvm::sys::path::stem(Modules[ID].FileName)] = ID; break; } @@ -215,14 +217,6 @@ GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer, break; } } - - // Compute imported-by relation. - for (unsigned ID = 0, IDEnd = Modules.size(); ID != IDEnd; ++ID) { - for (unsigned D = 0, DEnd = Modules[ID].Dependencies.size(); - D != DEnd; ++D) { - Modules[Modules[ID].Dependencies[D]].ImportedBy.push_back(ID); - } - } } GlobalModuleIndex::~GlobalModuleIndex() { } @@ -260,21 +254,14 @@ void GlobalModuleIndex::getKnownModules(SmallVectorImpl &ModuleFiles) { ModuleFiles.clear(); for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - if (ModuleFile *File = resolveModuleFile(I)) - ModuleFiles.push_back(File); + if (ModuleFile *MF = Modules[I].File) + ModuleFiles.push_back(MF); } } void GlobalModuleIndex::getModuleDependencies( ModuleFile *File, SmallVectorImpl &Dependencies) { - // If the file -> index mapping is empty, populate it now. - if (ModulesByFile.empty()) { - for (unsigned I = 0, N = Modules.size(); I != N; ++I) { - resolveModuleFile(I); - } - } - // Look for information about this module file. llvm::DenseMap::iterator Known = ModulesByFile.find(File); @@ -285,7 +272,7 @@ void GlobalModuleIndex::getModuleDependencies( Dependencies.clear(); ArrayRef StoredDependencies = Modules[Known->second].Dependencies; for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) { - if (ModuleFile *MF = resolveModuleFile(I)) + if (ModuleFile *MF = Modules[I].File) Dependencies.push_back(MF); } } @@ -308,60 +295,39 @@ bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { SmallVector ModuleIDs = *Known; for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) { - if (ModuleFile *File = resolveModuleFile(ModuleIDs[I])) - Hits.insert(File); + if (ModuleFile *MF = Modules[ModuleIDs[I]].File) + Hits.insert(MF); } ++NumIdentifierLookupHits; return true; } -ModuleFile *GlobalModuleIndex::resolveModuleFile(unsigned ID) { - assert(ID < Modules.size() && "Out-of-bounds module index"); - - // If we already have a module file, return it. - if (Modules[ID].File) - return Modules[ID].File; - - // If we don't have a file name, or if there is no resolver, we can't - // resolve this. - if (Modules[ID].FileName.empty() || !Resolver) - return 0; - - // Try to resolve this module file. - ModuleFile *File; - if (Resolver->resolveModuleFileName(Modules[ID].FileName, Modules[ID].Size, - Modules[ID].ModTime, File)) { - // Clear out the module files for anything that depends on this module. - llvm::SmallVector Stack; - - Stack.push_back(ID); - while (!Stack.empty()) { - unsigned Victim = Stack.back(); - Stack.pop_back(); - - // Mark this file as ignored. - Modules[Victim].File = 0; - Modules[Victim].FileName.clear(); - - // Push any not-yet-ignored imported modules onto the stack. - for (unsigned I = 0, N = Modules[Victim].ImportedBy.size(); I != N; ++I) { - unsigned NextVictim = Modules[Victim].ImportedBy[I]; - if (!Modules[NextVictim].FileName.empty()) - Stack.push_back(NextVictim); - } - } - - return 0; +bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) { + // Look for the module in the global module index based on the module name. + StringRef Name = llvm::sys::path::stem(File->FileName); + llvm::StringMap::iterator Known = UnresolvedModules.find(Name); + if (Known == UnresolvedModules.end()) { + return true; } - // If we have a module file, record it. - if (File) { - Modules[ID].File = File; - ModulesByFile[File] = ID; + // Rectify this module with the global module index. + ModuleInfo &Info = Modules[Known->second]; + + // If the size and modification time match what we expected, record this + // module file. + bool Failed = true; + if (File->File->getSize() == Info.Size && + File->File->getModificationTime() == Info.ModTime) { + Info.File = File; + ModulesByFile[File] = Known->second; + + Failed = false; } - return File; + // One way or another, we have resolved this module file. + UnresolvedModules.erase(Known); + return Failed; } void GlobalModuleIndex::printStats() { diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 7384da540437..193a38b97384 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -166,17 +166,6 @@ void ModuleManager::addInMemoryBuffer(StringRef FileName, InMemoryBuffers[Entry] = Buffer; } -void ModuleManager::updateModulesInCommonWithGlobalIndex() { - ModulesInCommonWithGlobalIndex.clear(); - - if (!GlobalIndex) - return; - - // Collect the set of modules known to the global index. - GlobalIndex->noteAdditionalModulesLoaded(); - GlobalIndex->getKnownModules(ModulesInCommonWithGlobalIndex); -} - ModuleManager::VisitState *ModuleManager::allocateVisitState() { // Fast path: if we have a cached state, use it. if (FirstVisitState) { @@ -198,10 +187,25 @@ void ModuleManager::returnVisitState(VisitState *State) { void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) { GlobalIndex = Index; - if (GlobalIndex) { - GlobalIndex->setResolver(this); + if (!GlobalIndex) { + ModulesInCommonWithGlobalIndex.clear(); + return; } - updateModulesInCommonWithGlobalIndex(); + + // Notify the global module index about all of the modules we've already + // loaded. + for (unsigned I = 0, N = Chain.size(); I != N; ++I) { + if (!GlobalIndex->loadedModuleFile(Chain[I])) { + ModulesInCommonWithGlobalIndex.push_back(Chain[I]); + } + } +} + +void ModuleManager::moduleFileAccepted(ModuleFile *MF) { + if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF)) + return; + + ModulesInCommonWithGlobalIndex.push_back(MF); } ModuleManager::ModuleManager(FileManager &FileMgr) @@ -264,11 +268,6 @@ ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData), assert(VisitOrder.size() == N && "Visitation order is wrong?"); - // We may need to update the set of modules we have in common with the - // global module index, since modules could have been added to the module - // manager since we loaded the global module index. - updateModulesInCommonWithGlobalIndex(); - delete FirstVisitState; FirstVisitState = 0; } @@ -387,34 +386,6 @@ bool ModuleManager::lookupModuleFile(StringRef FileName, return false; } -bool ModuleManager::resolveModuleFileName(StringRef FileName, - off_t ExpectedSize, - time_t ExpectedModTime, - ModuleFile *&File) { - File = 0; - - // Look for the file entry corresponding to this name. - const FileEntry *F; - if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, F)) - return true; - - // If there is no file, we've succeeded (trivially). - if (!F) - return false; - - // Determine whether we have a module file associated with this file entry. - llvm::DenseMap::iterator Known - = Modules.find(F); - if (Known == Modules.end()) { - // We don't know about this module file; invalidate the cache. - FileMgr.invalidateCache(F); - return false; - } - - File = Known->second; - return false; -} - #ifndef NDEBUG namespace llvm { template<>