From 705f7775bb6c1863da2cbde59270d545653f00d6 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Mon, 25 Jul 2016 18:45:37 +0000 Subject: [PATCH] [PGO] Fix profile mismatch in COMDAT function with pre-inliner Pre-instrumentation inline (pre-inliner) greatly improves the IR instrumentation code performance, among other benefits. One issue of the pre-inliner is it can introduce CFG-mismatch for COMDAT functions. This is due to the fact that the same COMDAT function may have different early inline decisions across different modules -- that means different copies of COMDAT functions will have different CFG checksum. In this patch, we propose a partially renaming the COMDAT group and its member function/variable so we have different profile counter for each version. We will post-fix the COMDAT function and the group name with its FunctionHash. Differential Revision: http://reviews.llvm.org/D22600 llvm-svn: 276673 --- .../Instrumentation/PGOInstrumentation.cpp | 155 ++++++++++++++++-- .../PGOProfile/Inputs/indirect_call.proftext | 2 +- .../Transforms/PGOProfile/comdat_internal.ll | 12 +- .../Transforms/PGOProfile/comdat_rename.ll | 59 +++++++ .../PGOProfile/indirect_call_profile.ll | 8 +- 5 files changed, 210 insertions(+), 26 deletions(-) create mode 100644 llvm/test/Transforms/PGOProfile/comdat_rename.ll diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index f9ebde46e77e..9b95a25ef9b6 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -51,6 +51,7 @@ #include "llvm/Transforms/PGOInstrumentation.h" #include "CFGMST.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -59,6 +60,7 @@ #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" @@ -75,6 +77,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include +#include #include #include @@ -112,6 +115,13 @@ static cl::opt MaxNumAnnotations( cl::desc("Max number of annotations for a single indirect " "call callsite")); +// Command line option to control appending FunctionHash to the name of a COMDAT +// function. This is to avoid the hash mismatch caused by the preinliner. +static cl::opt DoComdatRenaming( + "do-comdat-renaming", cl::init(true), cl::Hidden, + cl::desc("Append function hash to the name of COMDAT function to avoid " + "function hash mismatch due to the preinliner")); + // Command line option to enable/disable the warning about missing profile // information. static cl::opt NoPGOWarnMissing("no-pgo-warn-missing", cl::init(false), @@ -238,6 +248,9 @@ template class FuncPGOInstrumentation { private: Function &F; void computeCFGHash(); + void renameComdatFunction(); + // A map that stores the Comdat group in function F. + std::unordered_multimap &ComdatMembers; public: std::string FuncName; @@ -261,12 +274,17 @@ public: Twine(FunctionHash) + "\t" + Str); } - FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false, - BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), FunctionHash(0), MST(F, BPI, BFI) { + FuncPGOInstrumentation( + Function &Func, + std::unordered_multimap &ComdatMembers, + bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0), + MST(F, BPI, BFI) { FuncName = getPGOFuncName(F); computeCFGHash(); + if (ComdatMembers.size()) + renameComdatFunction(); DEBUG(dumpInfo("after CFGMST")); NumOfPGOBB += MST.BBInfos.size(); @@ -299,7 +317,88 @@ void FuncPGOInstrumentation::computeCFGHash() { } } JC.update(Indexes); - FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 | + (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); +} + +// Check if we can safely rename this Comdat function. +static bool canRenameComdat( + Function &F, + std::unordered_multimap &ComdatMembers) { + if (F.getName().empty()) + return false; + if (!needsComdatForCounter(F, *(F.getParent()))) + return false; + // Only safe to do if this function may be discarded if it is not used + // in the compilation unit. + if (!GlobalValue::isDiscardableIfUnused(F.getLinkage())) + return false; + + // For AvailableExternallyLinkage functions. + if (!F.hasComdat()) { + assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); + return true; + } + + // FIXME: Current only handle those Comdat groups that only containing one + // function and function aliases. + // (1) For a Comdat group containing multiple functions, we need to have a + // unique postfix based on the hashes for each function. There is a + // non-trivial code refactoring to do this efficiently. + // (2) Variables can not be renamed, so we can not rename Comdat function in a + // group including global vars. + Comdat *C = F.getComdat(); + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { + if (dyn_cast(CM.second)) + continue; + Function *FM = dyn_cast(CM.second); + if (FM != &F) + return false; + } + return true; +} + +// Append the CFGHash to the Comdat function name. +template +void FuncPGOInstrumentation::renameComdatFunction() { + if (!canRenameComdat(F, ComdatMembers)) + return; + std::string NewFuncName = + Twine(F.getName() + "." + Twine(FunctionHash)).str(); + F.setName(Twine(NewFuncName)); + FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); + Comdat *NewComdat; + Module *M = F.getParent(); + // For AvailableExternallyLinkage functions, change the linkage to + // LinkOnceODR and put them into comdat. This is because after renaming, there + // is no backup external copy available for the function. + if (!F.hasComdat()) { + assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); + NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); + F.setLinkage(GlobalValue::LinkOnceODRLinkage); + F.setComdat(NewComdat); + return; + } + + // This function belongs to a single function Comdat group. + Comdat *OrigComdat = F.getComdat(); + std::string NewComdatName = + Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); + NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); + NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); + + for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { + if (GlobalAlias *GA = dyn_cast(CM.second)) { + // For aliases, change the name directly. + assert(dyn_cast(GA->getAliasee()->stripPointerCasts()) == &F); + GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); + continue; + } + // Must be a function. + Function *CF = dyn_cast(CM.second); + assert(CF); + CF->setComdat(NewComdat); + } } // Given a CFG E to be instrumented, find which BB to place the instrumented @@ -340,16 +439,16 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { // Visit all edge and instrument the edges not in MST, and do value profiling. // Critical edges will be split. -static void instrumentOneFunc(Function &F, Module *M, - BranchProbabilityInfo *BPI, - BlockFrequencyInfo *BFI) { +static void instrumentOneFunc( + Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, + std::unordered_multimap &ComdatMembers) { unsigned NumCounters = 0; - FuncPGOInstrumentation FuncInfo(F, true, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, + BFI); for (auto &E : FuncInfo.MST.AllEdges) { if (!E->InMST && !E->Removed) NumCounters++; } - uint32_t I = 0; Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); for (auto &E : FuncInfo.MST.AllEdges) { @@ -456,9 +555,11 @@ static uint64_t sumEdgeCount(const ArrayRef Edges) { class PGOUseFunc { public: - PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, + PGOUseFunc(Function &Func, Module *Modu, + std::unordered_multimap &ComdatMembers, + BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) - : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI), + : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), FreqAttr(FFA_Normal) {} // Read counts for the instrumented BB from profile. @@ -479,6 +580,8 @@ public: // Return the function hotness from the profile. FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } + // Return the function hash. + uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } // Return the profile record for this function; InstrProfRecord &getProfileRecord() { return ProfileRecord; } @@ -802,16 +905,37 @@ static void createIRLevelProfileFlagVariable(Module &M) { StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); } +// Collect the set of members for each Comdat in module M and store +// in ComdatMembers. +static void collectComdatMembers( + Module &M, + std::unordered_multimap &ComdatMembers) { + if (!DoComdatRenaming) + return; + for (Function &F : M) + if (Comdat *C = F.getComdat()) + ComdatMembers.insert(std::make_pair(C, &F)); + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GV)); + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GA)); +} + static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, function_ref LookupBFI) { createIRLevelProfileFlagVariable(M); + std::unordered_multimap ComdatMembers; + collectComdatMembers(M, ComdatMembers); + for (auto &F : M) { if (F.isDeclaration()) continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); } return true; } @@ -877,6 +1001,8 @@ static bool annotateAllFunctions( return false; } + std::unordered_multimap ComdatMembers; + collectComdatMembers(M, ComdatMembers); std::vector HotFunctions; std::vector ColdFunctions; for (auto &F : M) { @@ -884,7 +1010,7 @@ static bool annotateAllFunctions( continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - PGOUseFunc Func(F, &M, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); if (!Func.readCounters(PGOReader.get())) continue; Func.populateCounters(); @@ -910,7 +1036,6 @@ static bool annotateAllFunctions( F->addFnAttr(llvm::Attribute::Cold); DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); } - return true; } diff --git a/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext b/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext index 269d85c5fd91..d453090d1c58 100644 --- a/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext @@ -1,7 +1,7 @@ :ir bar # Func Hash: -12884901887 +281487861612543 # Num Counters: 1 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll index 8cc41bf50068..25dafbea1035 100644 --- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll +++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll @@ -4,19 +4,19 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" $foo = comdat any +; CHECK: $foo.[[FOO_HASH:[0-9]+]] = comdat any ; CHECK: $__llvm_profile_raw_version = comdat any -; CHECK: $__profv__stdin__foo = comdat any +; CHECK: $__profv__stdin__foo.[[FOO_HASH]] = comdat any @bar = global i32 ()* @foo, align 8 ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat -; CHECK: @__profn__stdin__foo = private constant [11 x i8] c":foo" -; CHECK: @__profc__stdin__foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo), align 8 -; CHECK: @__profd__stdin__foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 12884901887, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo, i32 0, i32 0), i8* +; CHECK: @__profn__stdin__foo.[[FOO_HASH]] = private constant [23 x i8] c":foo.[[FOO_HASH]]" +; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 +; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6965568665848889497, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) -; CHECK-SAME: null -; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo), align 8 +; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.used diff --git a/llvm/test/Transforms/PGOProfile/comdat_rename.ll b/llvm/test/Transforms/PGOProfile/comdat_rename.ll new file mode 100644 index 000000000000..63bb367fda25 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/comdat_rename.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux -pgo-instr-gen -S | FileCheck --check-prefixes COMMON,ELFONLY %s +; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=pgo-instr-gen -S | FileCheck --check-prefixes COMMON,ELFONLY %s +; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -pgo-instr-gen -S | FileCheck --check-prefixes COMMON,COFFONLY %s +; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=pgo-instr-gen -S | FileCheck --check-prefixes COMMON,COFFONLY %s + +; Rename Comdat group and its function. +$f = comdat any +; COMMON: $f.[[SINGLEBB_HASH:[0-9]+]] = comdat any +define linkonce_odr void @f() comdat($f) { + ret void +} + +; Not rename Comdat with right linkage. +$nf = comdat any +; COMMON: $nf = comdat any +define void @nf() comdat($nf) { + ret void +} + +; Not rename Comdat with variable members. +$f_with_var = comdat any +; COMMON: $f_with_var = comdat any +@var = global i32 0, comdat($f_with_var) +define linkonce_odr void @f_with_var() comdat($f_with_var) { + %tmp = load i32, i32* @var, align 4 + %inc = add nsw i32 %tmp, 1 + store i32 %inc, i32* @var, align 4 + ret void +} + +; Not rename Comdat with multiple functions. +$tf = comdat any +; COMMON: $tf = comdat any +define linkonce void @tf() comdat($tf) { + ret void +} +define linkonce void @tf2() comdat($tf) { + ret void +} + +; Renaming Comdat with aliases. +$f_with_alias = comdat any +; COMMON: $f_with_alias.[[SINGLEBB_HASH]] = comdat any +@af = alias void (...), bitcast (void ()* @f_with_alias to void (...)*) +; COFFONLY: @af.[[SINGLEBB_HASH]] = alias void (...), bitcast (void ()* @f_with_alias.[[SINGLEBB_HASH]] to +; ELFONLY-DAG: @af.[[SINGLEBB_HASH]] = alias void (...), bitcast (void ()* @f_with_alias.[[SINGLEBB_HASH]] to +define linkonce_odr void @f_with_alias() comdat($f_with_alias) { + ret void +} + +; Rename AvailableExternallyLinkage functions +; ELFONLY-DAG: $aef.[[SINGLEBB_HASH]] = comdat any +define available_externally void @aef() { +; ELFONLY: define linkonce_odr void @aef.[[SINGLEBB_HASH]]() comdat { +; COFFONLY: define available_externally void @aef() { + ret void +} + + diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll b/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll index e1753acb7c74..409c29ef8728 100644 --- a/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll +++ b/llvm/test/Transforms/PGOProfile/indirect_call_profile.ll @@ -13,10 +13,10 @@ $foo3 = comdat any define void @foo() { entry: ; GEN: entry: -; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0) +; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH:[0-9]+]], i32 1, i32 0) %tmp = load void ()*, void ()** @bar, align 8 ; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64 -; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i64 [[ICALL_TARGET]], i32 0, i32 0) +; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0) call void %tmp() ret void } @@ -30,7 +30,7 @@ bb: invoke void %tmp2() to label %bb10 unwind label %bb2 ; GEN: [[ICALL_TARGET2:%[0-9]+]] = ptrtoint void ()* %tmp2 to i64 -; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 38432627612, i64 [[ICALL_TARGET2]], i32 0, i32 0) +; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 [[FOO2_HASH:[0-9]+]], i64 [[ICALL_TARGET2]], i32 0, i32 0) bb2: ; preds = %bb %tmp3 = landingpad { i8*, i32 } @@ -54,7 +54,7 @@ bb11: ; preds = %bb2 } ; Test that comdat function's address is recorded. -; LOWER: @__profd_foo3 = linkonce_odr{{.*}}@foo3 +; LOWER: @__profd_foo3.[[FOO3_HASH:[0-9]+]] = linkonce_odr{{.*}}@foo3.[[FOO3_HASH]] ; Function Attrs: nounwind uwtable define linkonce_odr i32 @foo3() comdat { ret i32 1