From 407870995773f469b3ba9189f1fce2415d97ba65 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 13 Apr 2016 04:20:32 +0000 Subject: [PATCH] Refactor Internalization pass to use as a callback instead of a StringSet (NFC) This will save a bunch of copies / initialization of intermediate datastructure, and (hopefully) simplify the code. This also abstract the symbol preservation mechanism outside of the Internalization pass into the client code, which is not forced to keep a map of strings for instance (ThinLTO will prefere hashes). From: Mehdi Amini llvm-svn: 266163 --- llvm/include/llvm/Transforms/IPO.h | 11 +- llvm/lib/LTO/LTOCodeGenerator.cpp | 18 ++- llvm/lib/LTO/LTOInternalize.cpp | 47 +++---- llvm/lib/LTO/LTOInternalize.h | 11 +- llvm/lib/Transforms/IPO/IPO.cpp | 8 +- llvm/lib/Transforms/IPO/Internalize.cpp | 170 +++++++++++++----------- 6 files changed, 145 insertions(+), 120 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index ba3529fd40ea..0bf0c306c19e 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -19,6 +19,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include + namespace llvm { class ModuleSummaryIndex; @@ -120,17 +122,16 @@ Pass *createPruneEHPass(); /// createInternalizePass - This pass loops over all of the functions in the /// input module, internalizing all globals (functions and variables) it can. //// -/// The symbols in \p ExportList are never internalized. +/// Before internalizing a symbol, the callback \p MustPreserveGV is invoked and +/// gives to the client the ability to prevent internalizing specific symbols. /// /// The symbol in DSOList are internalized if it is safe to drop them from /// the symbol table. /// /// Note that commandline options that are used with the above function are not /// used now! -ModulePass *createInternalizePass(StringSet<> ExportList); - -/// Same as above, but with an exportList created for an array. -ModulePass *createInternalizePass(ArrayRef ExportList); +ModulePass * +createInternalizePass(std::function MustPreserveGV); /// createInternalizePass - Same as above, but with an empty exportList. ModulePass *createInternalizePass(); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index fa9d1bb4c4ad..f81a39dac1ba 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -337,8 +337,22 @@ void LTOCodeGenerator::applyScopeRestrictions() { if (ScopeRestrictionsDone || !ShouldInternalize) return; - LTOInternalize(*MergedModule, *TargetMach, MustPreserveSymbols, - AsmUndefinedRefs, + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + Mangler Mangler; + SmallString<64> MangledName; + auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { + // Need to mangle the GV as the "MustPreserveSymbols" StringSet is filled + // with the linker supplied name, which on Darwin includes a leading + // underscore. + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mangler::getNameWithPrefix(MangledName, GV.getName(), + MergedModule->getDataLayout()); + return MustPreserveSymbols.count(MangledName); + }; + + LTOInternalize(*MergedModule, *TargetMach, MustPreserveGV, AsmUndefinedRefs, (ShouldRestoreGlobalsLinkage ? &ExternalSymbols : nullptr)); ScopeRestrictionsDone = true; diff --git a/llvm/lib/LTO/LTOInternalize.cpp b/llvm/lib/LTO/LTOInternalize.cpp index 74619d2d8b77..c3c95e7b1e94 100644 --- a/llvm/lib/LTO/LTOInternalize.cpp +++ b/llvm/lib/LTO/LTOInternalize.cpp @@ -23,31 +23,26 @@ using namespace llvm; namespace { - -class ComputePreserveList { +// Helper class that populate the array of symbols used in inlined assembly. +class ComputeAsmUsed { public: - ComputePreserveList(const StringSet<> &MustPreserveSymbols, - const StringSet<> &AsmUndefinedRefs, - const TargetMachine &TM, const Module &TheModule, - StringMap *ExternalSymbols, - std::vector &MustPreserveList, - SmallPtrSetImpl &AsmUsed) - : MustPreserveSymbols(MustPreserveSymbols), - AsmUndefinedRefs(AsmUndefinedRefs), TM(TM), - ExternalSymbols(ExternalSymbols), MustPreserveList(MustPreserveList), - AsmUsed(AsmUsed) { + ComputeAsmUsed(const StringSet<> &AsmUndefinedRefs, const TargetMachine &TM, + const Module &TheModule, + StringMap *ExternalSymbols, + SmallPtrSetImpl &AsmUsed) + : AsmUndefinedRefs(AsmUndefinedRefs), TM(TM), + ExternalSymbols(ExternalSymbols), AsmUsed(AsmUsed) { accumulateAndSortLibcalls(TheModule); for (const Function &F : TheModule) - applyRestriction(F); + findAsmUses(F); for (const GlobalVariable &GV : TheModule.globals()) - applyRestriction(GV); + findAsmUses(GV); for (const GlobalAlias &GA : TheModule.aliases()) - applyRestriction(GA); + findAsmUses(GA); } private: // Inputs - const StringSet<> &MustPreserveSymbols; const StringSet<> &AsmUndefinedRefs; const TargetMachine &TM; @@ -57,7 +52,6 @@ private: // Output StringMap *ExternalSymbols; - std::vector &MustPreserveList; SmallPtrSetImpl &AsmUsed; // Collect names of runtime library functions. User-defined functions with the @@ -97,7 +91,7 @@ private: Libcalls.end()); } - void applyRestriction(const GlobalValue &GV) { + void findAsmUses(const GlobalValue &GV) { // There are no restrictions to apply to declarations. if (GV.isDeclaration()) return; @@ -109,8 +103,6 @@ private: SmallString<64> Buffer; TM.getNameWithPrefix(Buffer, &GV, Mangler); - if (MustPreserveSymbols.count(Buffer)) - MustPreserveList.push_back(GV.getName().data()); if (AsmUndefinedRefs.count(Buffer)) AsmUsed.insert(&GV); @@ -146,18 +138,14 @@ static void findUsedValues(GlobalVariable *LLVMUsed, UsedValues.insert(GV); } +// mark which symbols can not be internalized void llvm::LTOInternalize( Module &TheModule, const TargetMachine &TM, - const StringSet<> &MustPreserveSymbols, const StringSet<> &AsmUndefinedRefs, + const std::function &MustPreserveSymbols, + const StringSet<> &AsmUndefinedRefs, StringMap *ExternalSymbols) { - legacy::PassManager passes; - // mark which symbols can not be internalized - Mangler Mangler; - std::vector MustPreserveList; SmallPtrSet AsmUsed; - - ComputePreserveList(MustPreserveSymbols, AsmUndefinedRefs, TM, TheModule, - ExternalSymbols, MustPreserveList, AsmUsed); + ComputeAsmUsed(AsmUndefinedRefs, TM, TheModule, ExternalSymbols, AsmUsed); GlobalVariable *LLVMCompilerUsed = TheModule.getGlobalVariable("llvm.compiler.used"); @@ -182,7 +170,8 @@ void llvm::LTOInternalize( LLVMCompilerUsed->setSection("llvm.metadata"); } - passes.add(createInternalizePass(MustPreserveList)); + legacy::PassManager passes; + passes.add(createInternalizePass(MustPreserveSymbols)); // apply scope restrictions passes.run(TheModule); diff --git a/llvm/lib/LTO/LTOInternalize.h b/llvm/lib/LTO/LTOInternalize.h index ebe5a6ffcd09..6d79866fdbaf 100644 --- a/llvm/lib/LTO/LTOInternalize.h +++ b/llvm/lib/LTO/LTOInternalize.h @@ -17,14 +17,17 @@ #include "llvm/ADT/StringSet.h" #include "llvm/IR/GlobalValue.h" +#include + namespace llvm { class Module; class TargetMachine; -void LTOInternalize(Module &TheModule, const TargetMachine &TM, - const StringSet<> &MustPreserveSymbols, - const StringSet<> &AsmUndefinedRefs, - StringMap *ExternalSymbols); +void LTOInternalize( + Module &TheModule, const TargetMachine &TM, + const std::function &MustPreserveSymbols, + const StringSet<> &AsmUndefinedRefs, + StringMap *ExternalSymbols); } #endif // LLVM_LTO_LTOINTERNALIZE_H diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index 10f3365aeb73..7e0106064291 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -106,10 +106,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { } void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { - std::vector Export; - if (AllButMain) - Export.push_back("main"); - unwrap(PM)->add(createInternalizePass(Export)); + auto PreserveMain = [=](const GlobalValue &GV) { + return AllButMain && GV.getName() == "main"; + }; + unwrap(PM)->add(createInternalizePass(PreserveMain)); } void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index a57176f75b02..f2b848b98ff9 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -54,19 +54,84 @@ APIList("internalize-public-api-list", cl::value_desc("list"), cl::CommaSeparated); namespace { - class InternalizePass : public ModulePass { +// Helper to load an API list to preserve from file and expose it as a functor +// to the Internalize Pass +class PreserveAPIList { +public: + PreserveAPIList() { + if (!APIFile.empty()) + LoadFile(APIFile); + ExternalNames.insert(APIList.begin(), APIList.end()); + } + + bool operator()(const GlobalValue &GV) { + return ExternalNames.count(GV.getName()); + } + +private: + // Contains the set of symbols loaded from file StringSet<> ExternalNames; - public: - static char ID; // Pass identification, replacement for typeid - explicit InternalizePass(); - explicit InternalizePass(ArrayRef ExportList); - explicit InternalizePass(StringSet<> ExportList); - void LoadFile(const char *Filename); + void LoadFile(StringRef Filename) { + // Load the APIFile... + std::ifstream In(Filename.data()); + if (!In.good()) { + errs() << "WARNING: Internalize couldn't load file '" << Filename + << "'! Continuing as if it's empty.\n"; + return; // Just continue as if the file were empty + } + while (In) { + std::string Symbol; + In >> Symbol; + if (!Symbol.empty()) + ExternalNames.insert(Symbol); + } + } +}; +} + +namespace { +class InternalizePass : public ModulePass { + // Client supply callback to control wheter a symbol must be preserved. + std::function MustPreserveGV; + + // Set of symbols private to the compiler that this pass should not touch. + StringSet<> AlwaysPreserved; + + // Return false if we're allowed to internalize this GV. + bool ShouldPreserveGV(const GlobalValue &GV) { + // Function must be defined here + if (GV.isDeclaration()) + return true; + + // Available externally is really just a "declaration with a body". + if (GV.hasAvailableExternallyLinkage()) + return true; + + // Assume that dllexported symbols are referenced elsewhere + if (GV.hasDLLExportStorageClass()) + return true; + + // Already local, has nothing to do. + if (GV.hasLocalLinkage()) + return false; + + // Check some special cases + if (AlwaysPreserved.count(GV.getName())) + return true; + + return MustPreserveGV(GV); + } + bool maybeInternalize(GlobalValue &GV, const std::set &ExternalComdats); void checkComdatVisibility(GlobalValue &GV, std::set &ExternalComdats); + + public: + static char ID; // Pass identification, replacement for typeid + explicit InternalizePass(); + InternalizePass(std::function MustPreserveGV); bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -80,60 +145,13 @@ char InternalizePass::ID = 0; INITIALIZE_PASS(InternalizePass, "internalize", "Internalize Global Symbols", false, false) -InternalizePass::InternalizePass() : ModulePass(ID) { +InternalizePass::InternalizePass() + : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {} + +InternalizePass::InternalizePass( + std::function MustPreserveGV) + : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) { initializeInternalizePassPass(*PassRegistry::getPassRegistry()); - if (!APIFile.empty()) // If a filename is specified, use it. - LoadFile(APIFile.c_str()); - ExternalNames.insert(APIList.begin(), APIList.end()); -} - -InternalizePass::InternalizePass(ArrayRef ExportList) - : ModulePass(ID) { - initializeInternalizePassPass(*PassRegistry::getPassRegistry()); - for(ArrayRef::const_iterator itr = ExportList.begin(); - itr != ExportList.end(); itr++) { - ExternalNames.insert(*itr); - } -} - -InternalizePass::InternalizePass(StringSet<> ExportList) - : ModulePass(ID), ExternalNames(std::move(ExportList)) {} - -void InternalizePass::LoadFile(const char *Filename) { - // Load the APIFile... - std::ifstream In(Filename); - if (!In.good()) { - errs() << "WARNING: Internalize couldn't load file '" << Filename - << "'! Continuing as if it's empty.\n"; - return; // Just continue as if the file were empty - } - while (In) { - std::string Symbol; - In >> Symbol; - if (!Symbol.empty()) - ExternalNames.insert(Symbol); - } -} - -static bool isExternallyVisible(const GlobalValue &GV, - const StringSet<> &ExternalNames) { - // Function must be defined here - if (GV.isDeclaration()) - return true; - - // Available externally is really just a "declaration with a body". - if (GV.hasAvailableExternallyLinkage()) - return true; - - // Assume that dllexported symbols are referenced elsewhere - if (GV.hasDLLExportStorageClass()) - return true; - - // Marked to keep external? - if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) - return true; - - return false; } // Internalize GV if it is possible to do so, i.e. it is not externally visible @@ -154,7 +172,7 @@ bool InternalizePass::maybeInternalize( if (GV.hasLocalLinkage()) return false; - if (isExternallyVisible(GV, ExternalNames)) + if (ShouldPreserveGV(GV)) return false; } @@ -171,7 +189,7 @@ void InternalizePass::checkComdatVisibility( if (!C) return; - if (isExternallyVisible(GV, ExternalNames)) + if (ShouldPreserveGV(GV)) ExternalComdats.insert(C); } @@ -204,7 +222,7 @@ bool InternalizePass::runOnModule(Module &M) { // conservative, we internalize symbols in llvm.compiler.used, but we // keep llvm.compiler.used so that the symbol is not deleted by llvm. for (GlobalValue *V : Used) { - ExternalNames.insert(V->getName()); + AlwaysPreserved.insert(V->getName()); } // Mark all functions not in the api as internal. @@ -223,20 +241,20 @@ bool InternalizePass::runOnModule(Module &M) { // Never internalize the llvm.used symbol. It is used to implement // attribute((used)). // FIXME: Shouldn't this just filter on llvm.metadata section?? - ExternalNames.insert("llvm.used"); - ExternalNames.insert("llvm.compiler.used"); + AlwaysPreserved.insert("llvm.used"); + AlwaysPreserved.insert("llvm.compiler.used"); // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) - ExternalNames.insert("llvm.global_ctors"); - ExternalNames.insert("llvm.global_dtors"); - ExternalNames.insert("llvm.global.annotations"); + AlwaysPreserved.insert("llvm.global_ctors"); + AlwaysPreserved.insert("llvm.global_dtors"); + AlwaysPreserved.insert("llvm.global.annotations"); // Never internalize symbols code-gen inserts. // FIXME: We should probably add this (and the __stack_chk_guard) via some // type of call-back in CodeGen. - ExternalNames.insert("__stack_chk_fail"); - ExternalNames.insert("__stack_chk_guard"); + AlwaysPreserved.insert("__stack_chk_fail"); + AlwaysPreserved.insert("__stack_chk_guard"); // Mark all global variables with initializers that are not in the api as // internal as well. @@ -268,12 +286,12 @@ bool InternalizePass::runOnModule(Module &M) { return true; } -ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } - -ModulePass *llvm::createInternalizePass(ArrayRef ExportList) { - return new InternalizePass(ExportList); +ModulePass *llvm::createInternalizePass() { + return new InternalizePass(PreserveAPIList()); } -ModulePass *llvm::createInternalizePass(StringSet<> ExportList) { - return new InternalizePass(std::move(ExportList)); +ModulePass *llvm::createInternalizePass( + std::function MustPreserveGV) { + return new InternalizePass(std::move(MustPreserveGV)); } +