[PGO] Fix profile mismatch in COMDAT function with pre-inliner

Pre-instrumentation inline (pre-inliner) greatly improves the IR
instrumentation code performance, among other benefits. One issue of the
pre-inliner is it can introduce CFG-mismatch for COMDAT functions. This
is due to the fact that the same COMDAT function may have different early
inline decisions across different modules -- that means different copies
of COMDAT functions will have different CFG checksum.

In this patch, we propose a partially renaming the COMDAT group and its
member function/variable so we have different profile counter for each
version. We will post-fix the COMDAT function and the group name with its
FunctionHash.

Differential Revision: http://reviews.llvm.org/D22600

llvm-svn: 276673
This commit is contained in:
Rong Xu 2016-07-25 18:45:37 +00:00
parent 9a89b15aa2
commit 705f7775bb
5 changed files with 210 additions and 26 deletions

View File

@ -51,6 +51,7 @@
#include "llvm/Transforms/PGOInstrumentation.h"
#include "CFGMST.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@ -59,6 +60,7 @@
#include "llvm/Analysis/IndirectCallSiteVisitor.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@ -75,6 +77,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
@ -112,6 +115,13 @@ static cl::opt<unsigned> MaxNumAnnotations(
cl::desc("Max number of annotations for a single indirect "
"call callsite"));
// Command line option to control appending FunctionHash to the name of a COMDAT
// function. This is to avoid the hash mismatch caused by the preinliner.
static cl::opt<bool> DoComdatRenaming(
"do-comdat-renaming", cl::init(true), cl::Hidden,
cl::desc("Append function hash to the name of COMDAT function to avoid "
"function hash mismatch due to the preinliner"));
// Command line option to enable/disable the warning about missing profile
// information.
static cl::opt<bool> NoPGOWarnMissing("no-pgo-warn-missing", cl::init(false),
@ -238,6 +248,9 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
private:
Function &F;
void computeCFGHash();
void renameComdatFunction();
// A map that stores the Comdat group in function F.
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
public:
std::string FuncName;
@ -261,12 +274,17 @@ public:
Twine(FunctionHash) + "\t" + Str);
}
FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false,
BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr)
: F(Func), FunctionHash(0), MST(F, BPI, BFI) {
FuncPGOInstrumentation(
Function &Func,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr)
: F(Func), ComdatMembers(ComdatMembers), FunctionHash(0),
MST(F, BPI, BFI) {
FuncName = getPGOFuncName(F);
computeCFGHash();
if (ComdatMembers.size())
renameComdatFunction();
DEBUG(dumpInfo("after CFGMST"));
NumOfPGOBB += MST.BBInfos.size();
@ -299,7 +317,88 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
}
}
JC.update(Indexes);
FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 |
(uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
}
// Check if we can safely rename this Comdat function.
static bool canRenameComdat(
Function &F,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
if (F.getName().empty())
return false;
if (!needsComdatForCounter(F, *(F.getParent())))
return false;
// Only safe to do if this function may be discarded if it is not used
// in the compilation unit.
if (!GlobalValue::isDiscardableIfUnused(F.getLinkage()))
return false;
// For AvailableExternallyLinkage functions.
if (!F.hasComdat()) {
assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
return true;
}
// FIXME: Current only handle those Comdat groups that only containing one
// function and function aliases.
// (1) For a Comdat group containing multiple functions, we need to have a
// unique postfix based on the hashes for each function. There is a
// non-trivial code refactoring to do this efficiently.
// (2) Variables can not be renamed, so we can not rename Comdat function in a
// group including global vars.
Comdat *C = F.getComdat();
for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
if (dyn_cast<GlobalAlias>(CM.second))
continue;
Function *FM = dyn_cast<Function>(CM.second);
if (FM != &F)
return false;
}
return true;
}
// Append the CFGHash to the Comdat function name.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
if (!canRenameComdat(F, ComdatMembers))
return;
std::string NewFuncName =
Twine(F.getName() + "." + Twine(FunctionHash)).str();
F.setName(Twine(NewFuncName));
FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
Comdat *NewComdat;
Module *M = F.getParent();
// For AvailableExternallyLinkage functions, change the linkage to
// LinkOnceODR and put them into comdat. This is because after renaming, there
// is no backup external copy available for the function.
if (!F.hasComdat()) {
assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
F.setLinkage(GlobalValue::LinkOnceODRLinkage);
F.setComdat(NewComdat);
return;
}
// This function belongs to a single function Comdat group.
Comdat *OrigComdat = F.getComdat();
std::string NewComdatName =
Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
// For aliases, change the name directly.
assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
continue;
}
// Must be a function.
Function *CF = dyn_cast<Function>(CM.second);
assert(CF);
CF->setComdat(NewComdat);
}
}
// Given a CFG E to be instrumented, find which BB to place the instrumented
@ -340,16 +439,16 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
// Visit all edge and instrument the edges not in MST, and do value profiling.
// Critical edges will be split.
static void instrumentOneFunc(Function &F, Module *M,
BranchProbabilityInfo *BPI,
BlockFrequencyInfo *BFI) {
static void instrumentOneFunc(
Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
unsigned NumCounters = 0;
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, true, BPI, BFI);
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
BFI);
for (auto &E : FuncInfo.MST.AllEdges) {
if (!E->InMST && !E->Removed)
NumCounters++;
}
uint32_t I = 0;
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
for (auto &E : FuncInfo.MST.AllEdges) {
@ -456,9 +555,11 @@ static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
class PGOUseFunc {
public:
PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr,
PGOUseFunc(Function &Func, Module *Modu,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr)
: F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI),
: F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI),
FreqAttr(FFA_Normal) {}
// Read counts for the instrumented BB from profile.
@ -479,6 +580,8 @@ public:
// Return the function hotness from the profile.
FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
// Return the function hash.
uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
// Return the profile record for this function;
InstrProfRecord &getProfileRecord() { return ProfileRecord; }
@ -802,16 +905,37 @@ static void createIRLevelProfileFlagVariable(Module &M) {
StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
}
// Collect the set of members for each Comdat in module M and store
// in ComdatMembers.
static void collectComdatMembers(
Module &M,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
if (!DoComdatRenaming)
return;
for (Function &F : M)
if (Comdat *C = F.getComdat())
ComdatMembers.insert(std::make_pair(C, &F));
for (GlobalVariable &GV : M.globals())
if (Comdat *C = GV.getComdat())
ComdatMembers.insert(std::make_pair(C, &GV));
for (GlobalAlias &GA : M.aliases())
if (Comdat *C = GA.getComdat())
ComdatMembers.insert(std::make_pair(C, &GA));
}
static bool InstrumentAllFunctions(
Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
createIRLevelProfileFlagVariable(M);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
for (auto &F : M) {
if (F.isDeclaration())
continue;
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
instrumentOneFunc(F, &M, BPI, BFI);
instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
}
return true;
}
@ -877,6 +1001,8 @@ static bool annotateAllFunctions(
return false;
}
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
std::vector<Function *> HotFunctions;
std::vector<Function *> ColdFunctions;
for (auto &F : M) {
@ -884,7 +1010,7 @@ static bool annotateAllFunctions(
continue;
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
PGOUseFunc Func(F, &M, BPI, BFI);
PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
if (!Func.readCounters(PGOReader.get()))
continue;
Func.populateCounters();
@ -910,7 +1036,6 @@ static bool annotateAllFunctions(
F->addFnAttr(llvm::Attribute::Cold);
DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n");
}
return true;
}

View File

@ -1,7 +1,7 @@
:ir
bar
# Func Hash:
12884901887
281487861612543
# Num Counters:
1
# Counter Values:

View File

@ -4,19 +4,19 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
$foo = comdat any
; CHECK: $foo.[[FOO_HASH:[0-9]+]] = comdat any
; CHECK: $__llvm_profile_raw_version = comdat any
; CHECK: $__profv__stdin__foo = comdat any
; CHECK: $__profv__stdin__foo.[[FOO_HASH]] = comdat any
@bar = global i32 ()* @foo, align 8
; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; CHECK: @__profn__stdin__foo = private constant [11 x i8] c"<stdin>:foo"
; CHECK: @__profc__stdin__foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo), align 8
; CHECK: @__profd__stdin__foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 12884901887, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo, i32 0, i32 0), i8*
; CHECK: @__profn__stdin__foo.[[FOO_HASH]] = private constant [23 x i8] c"<stdin>:foo.[[FOO_HASH]]"
; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8
; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6965568665848889497, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null
; CHECK-NOT: bitcast (i32 ()* @foo to i8*)
; CHECK-SAME: null
; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo), align 8
; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8
; CHECK: @__llvm_prf_nm
; CHECK: @llvm.used

View File

@ -0,0 +1,59 @@
; RUN: opt < %s -mtriple=x86_64-unknown-linux -pgo-instr-gen -S | FileCheck --check-prefixes COMMON,ELFONLY %s
; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=pgo-instr-gen -S | FileCheck --check-prefixes COMMON,ELFONLY %s
; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -pgo-instr-gen -S | FileCheck --check-prefixes COMMON,COFFONLY %s
; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=pgo-instr-gen -S | FileCheck --check-prefixes COMMON,COFFONLY %s
; Rename Comdat group and its function.
$f = comdat any
; COMMON: $f.[[SINGLEBB_HASH:[0-9]+]] = comdat any
define linkonce_odr void @f() comdat($f) {
ret void
}
; Not rename Comdat with right linkage.
$nf = comdat any
; COMMON: $nf = comdat any
define void @nf() comdat($nf) {
ret void
}
; Not rename Comdat with variable members.
$f_with_var = comdat any
; COMMON: $f_with_var = comdat any
@var = global i32 0, comdat($f_with_var)
define linkonce_odr void @f_with_var() comdat($f_with_var) {
%tmp = load i32, i32* @var, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* @var, align 4
ret void
}
; Not rename Comdat with multiple functions.
$tf = comdat any
; COMMON: $tf = comdat any
define linkonce void @tf() comdat($tf) {
ret void
}
define linkonce void @tf2() comdat($tf) {
ret void
}
; Renaming Comdat with aliases.
$f_with_alias = comdat any
; COMMON: $f_with_alias.[[SINGLEBB_HASH]] = comdat any
@af = alias void (...), bitcast (void ()* @f_with_alias to void (...)*)
; COFFONLY: @af.[[SINGLEBB_HASH]] = alias void (...), bitcast (void ()* @f_with_alias.[[SINGLEBB_HASH]] to
; ELFONLY-DAG: @af.[[SINGLEBB_HASH]] = alias void (...), bitcast (void ()* @f_with_alias.[[SINGLEBB_HASH]] to
define linkonce_odr void @f_with_alias() comdat($f_with_alias) {
ret void
}
; Rename AvailableExternallyLinkage functions
; ELFONLY-DAG: $aef.[[SINGLEBB_HASH]] = comdat any
define available_externally void @aef() {
; ELFONLY: define linkonce_odr void @aef.[[SINGLEBB_HASH]]() comdat {
; COFFONLY: define available_externally void @aef() {
ret void
}

View File

@ -13,10 +13,10 @@ $foo3 = comdat any
define void @foo() {
entry:
; GEN: entry:
; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH:[0-9]+]], i32 1, i32 0)
%tmp = load void ()*, void ()** @bar, align 8
; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i64 [[ICALL_TARGET]], i32 0, i32 0)
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 [[FOO_HASH]], i64 [[ICALL_TARGET]], i32 0, i32 0)
call void %tmp()
ret void
}
@ -30,7 +30,7 @@ bb:
invoke void %tmp2()
to label %bb10 unwind label %bb2
; GEN: [[ICALL_TARGET2:%[0-9]+]] = ptrtoint void ()* %tmp2 to i64
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 38432627612, i64 [[ICALL_TARGET2]], i32 0, i32 0)
; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 [[FOO2_HASH:[0-9]+]], i64 [[ICALL_TARGET2]], i32 0, i32 0)
bb2: ; preds = %bb
%tmp3 = landingpad { i8*, i32 }
@ -54,7 +54,7 @@ bb11: ; preds = %bb2
}
; Test that comdat function's address is recorded.
; LOWER: @__profd_foo3 = linkonce_odr{{.*}}@foo3
; LOWER: @__profd_foo3.[[FOO3_HASH:[0-9]+]] = linkonce_odr{{.*}}@foo3.[[FOO3_HASH]]
; Function Attrs: nounwind uwtable
define linkonce_odr i32 @foo3() comdat {
ret i32 1