forked from OSchip/llvm-project
[ThinLTO] Add call edges' relative block frequency to per-module summary.
Summary: This allows relative block frequency of call edges to be passed to the thinlink stage where it will be used to compute synthetic entry counts of functions. Reviewers: tejohnson, pcc Subscribers: mehdi_amini, llvm-commits, inglorion Differential Revision: https://reviews.llvm.org/D42212 llvm-svn: 323349
This commit is contained in:
parent
4bd8e5332f
commit
5f7aff9a0a
|
@ -256,6 +256,11 @@ enum GlobalValueSummarySymtabCodes {
|
|||
// strings in strtab.
|
||||
// [n * name]
|
||||
FS_CFI_FUNCTION_DECLS = 18,
|
||||
// Per-module summary that also adds relative block frequency to callee info.
|
||||
// PERMODULE_RELBF: [valueid, flags, instcount, numrefs,
|
||||
// numrefs x valueid,
|
||||
// n x (valueid, relblockfreq)]
|
||||
FS_PERMODULE_RELBF = 19,
|
||||
};
|
||||
|
||||
enum MetadataCodes {
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
|
@ -54,14 +55,28 @@ struct CalleeInfo {
|
|||
Hot = 3,
|
||||
Critical = 4
|
||||
};
|
||||
HotnessType Hotness = HotnessType::Unknown;
|
||||
|
||||
CalleeInfo() = default;
|
||||
explicit CalleeInfo(HotnessType Hotness) : Hotness(Hotness) {}
|
||||
// The size of the bit-field might need to be adjusted if more values are
|
||||
// added to HotnessType enum.
|
||||
HotnessType Hotness : 3;
|
||||
uint32_t RelBlockFreq : 29;
|
||||
static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1;
|
||||
|
||||
CalleeInfo() : Hotness(HotnessType::Unknown), RelBlockFreq(0) {}
|
||||
explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF)
|
||||
: Hotness(Hotness), RelBlockFreq(RelBF) {}
|
||||
|
||||
void updateHotness(const HotnessType OtherHotness) {
|
||||
Hotness = std::max(Hotness, OtherHotness);
|
||||
}
|
||||
|
||||
// When there are multiple edges between the same (caller, callee) pair, the
|
||||
// relative block frequencies are summed up.
|
||||
void updateRelBlockFreq(uint64_t RBF) {
|
||||
uint64_t Sum = SaturatingAdd<uint64_t>(RelBlockFreq, RBF);
|
||||
Sum = std::min(Sum, uint64_t(MaxRelBlockFreq));
|
||||
RelBlockFreq = static_cast<uint32_t>(Sum);
|
||||
}
|
||||
};
|
||||
|
||||
class GlobalValueSummary;
|
||||
|
|
|
@ -273,9 +273,24 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
|
|||
// to record the call edge to the alias in that case. Eventually
|
||||
// an alias summary will be created to associate the alias and
|
||||
// aliasee.
|
||||
CallGraphEdges[Index.getOrInsertValueInfo(
|
||||
cast<GlobalValue>(CalledValue))]
|
||||
.updateHotness(Hotness);
|
||||
auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
|
||||
cast<GlobalValue>(CalledValue))];
|
||||
ValueInfo.updateHotness(Hotness);
|
||||
// Add the relative block frequency to CalleeInfo if there is no profile
|
||||
// information.
|
||||
if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
|
||||
auto BBFreq = BFI->getBlockFreq(CI->getParent()).getFrequency();
|
||||
// FIXME: This might need some scaling to prevent BBFreq values from
|
||||
// being rounded down to 0.
|
||||
auto EntryFreq = BFI->getEntryFreq();
|
||||
// Block frequencies can be directly set for a block and so we need to
|
||||
// handle the case of entry frequency being 0.
|
||||
if (EntryFreq)
|
||||
BBFreq /= EntryFreq;
|
||||
else
|
||||
BBFreq = 0;
|
||||
ValueInfo.updateRelBlockFreq(BBFreq);
|
||||
}
|
||||
} else {
|
||||
// Skip inline assembly calls.
|
||||
if (CI && CI->isInlineAsm())
|
||||
|
|
|
@ -743,7 +743,8 @@ private:
|
|||
std::vector<ValueInfo> makeRefList(ArrayRef<uint64_t> Record);
|
||||
std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
|
||||
bool IsOldProfileFormat,
|
||||
bool HasProfile);
|
||||
bool HasProfile,
|
||||
bool HasRelBF);
|
||||
Error parseEntireSummary(unsigned ID);
|
||||
Error parseModuleStringTable();
|
||||
|
||||
|
@ -5047,12 +5048,15 @@ ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef<uint64_t> Record) {
|
|||
return Ret;
|
||||
}
|
||||
|
||||
std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallList(
|
||||
ArrayRef<uint64_t> Record, bool IsOldProfileFormat, bool HasProfile) {
|
||||
std::vector<FunctionSummary::EdgeTy>
|
||||
ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef<uint64_t> Record,
|
||||
bool IsOldProfileFormat,
|
||||
bool HasProfile, bool HasRelBF) {
|
||||
std::vector<FunctionSummary::EdgeTy> Ret;
|
||||
Ret.reserve(Record.size());
|
||||
for (unsigned I = 0, E = Record.size(); I != E; ++I) {
|
||||
CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
|
||||
uint64_t RelBF = 0;
|
||||
ValueInfo Callee = getValueInfoFromValueId(Record[I]).first;
|
||||
if (IsOldProfileFormat) {
|
||||
I += 1; // Skip old callsitecount field
|
||||
|
@ -5060,7 +5064,9 @@ std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallLi
|
|||
I += 1; // Skip old profilecount field
|
||||
} else if (HasProfile)
|
||||
Hotness = static_cast<CalleeInfo::HotnessType>(Record[++I]);
|
||||
Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo{Hotness}});
|
||||
else if (HasRelBF)
|
||||
RelBF = Record[++I];
|
||||
Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo(Hotness, RelBF)});
|
||||
}
|
||||
return Ret;
|
||||
}
|
||||
|
@ -5139,7 +5145,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
|||
// FS_PERMODULE_PROFILE: [valueid, flags, instcount, fflags, numrefs,
|
||||
// numrefs x valueid,
|
||||
// n x (valueid, hotness)]
|
||||
// FS_PERMODULE_RELBF: [valueid, flags, instcount, fflags, numrefs,
|
||||
// numrefs x valueid,
|
||||
// n x (valueid, relblockfreq)]
|
||||
case bitc::FS_PERMODULE:
|
||||
case bitc::FS_PERMODULE_RELBF:
|
||||
case bitc::FS_PERMODULE_PROFILE: {
|
||||
unsigned ValueID = Record[0];
|
||||
uint64_t RawFlags = Record[1];
|
||||
|
@ -5165,9 +5175,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
|||
std::vector<ValueInfo> Refs = makeRefList(
|
||||
ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
|
||||
bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE);
|
||||
bool HasRelBF = (BitCode == bitc::FS_PERMODULE_RELBF);
|
||||
std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
|
||||
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
|
||||
IsOldProfileFormat, HasProfile);
|
||||
IsOldProfileFormat, HasProfile, HasRelBF);
|
||||
auto FS = llvm::make_unique<FunctionSummary>(
|
||||
Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
|
||||
std::move(Calls), std::move(PendingTypeTests),
|
||||
|
@ -5259,7 +5270,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
|
|||
bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE);
|
||||
std::vector<FunctionSummary::EdgeTy> Edges = makeCallList(
|
||||
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
|
||||
IsOldProfileFormat, HasProfile);
|
||||
IsOldProfileFormat, HasProfile, false);
|
||||
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
|
||||
auto FS = llvm::make_unique<FunctionSummary>(
|
||||
Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
|
||||
|
|
|
@ -86,6 +86,9 @@ static cl::opt<unsigned>
|
|||
cl::desc("Number of metadatas above which we emit an index "
|
||||
"to enable lazy-loading"));
|
||||
|
||||
cl::opt<bool> WriteRelBFToSummary(
|
||||
"write-relbf-to-summary", cl::Hidden, cl::init(false),
|
||||
cl::desc("Write relative block frequency to function summary "));
|
||||
namespace {
|
||||
|
||||
/// These are manifest constants used by the bitcode writer. They do not need to
|
||||
|
@ -3378,11 +3381,15 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
|
|||
NameVals.push_back(getValueId(ECI.first));
|
||||
if (HasProfileData)
|
||||
NameVals.push_back(static_cast<uint8_t>(ECI.second.Hotness));
|
||||
else if (WriteRelBFToSummary)
|
||||
NameVals.push_back(ECI.second.RelBlockFreq);
|
||||
}
|
||||
|
||||
unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
|
||||
unsigned Code =
|
||||
(HasProfileData ? bitc::FS_PERMODULE_PROFILE : bitc::FS_PERMODULE);
|
||||
(HasProfileData ? bitc::FS_PERMODULE_PROFILE
|
||||
: (WriteRelBFToSummary ? bitc::FS_PERMODULE_RELBF
|
||||
: bitc::FS_PERMODULE));
|
||||
|
||||
// Emit the finished record.
|
||||
Stream.EmitRecord(Code, NameVals, FSAbbrev);
|
||||
|
@ -3448,21 +3455,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
|
|||
ArrayRef<uint64_t>{GVI.second, GVI.first});
|
||||
}
|
||||
|
||||
// Abbrev for FS_PERMODULE.
|
||||
auto Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
|
||||
// numrefs x valueid, n x (valueid)
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
// Abbrev for FS_PERMODULE_PROFILE.
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
auto Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
|
||||
|
@ -3474,6 +3468,22 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
|
|||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
// Abbrev for FS_PERMODULE or FS_PERMODULE_RELBF.
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
if (WriteRelBFToSummary)
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_RELBF));
|
||||
else
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
|
||||
// numrefs x valueid, n x (valueid [, rel_block_freq])
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
// Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS));
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
; Test to check the callgraph in summary
|
||||
; RUN: opt -write-relbf-to-summary -module-summary %s -o %t.o
|
||||
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
|
||||
|
||||
|
||||
; CHECK: <SOURCE_FILENAME
|
||||
; CHECK-NEXT: <GLOBALVAR
|
||||
; CHECK-NEXT: <FUNCTION
|
||||
; "func"
|
||||
; CHECK-NEXT: <FUNCTION op0=17 op1=4
|
||||
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
|
||||
; CHECK-NEXT: <VERSION
|
||||
; See if the call to func is registered.
|
||||
; CHECK-NEXT: <PERMODULE_RELBF {{.*}} op4=1 {{.*}} op7=1
|
||||
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
|
||||
; CHECK: <STRTAB_BLOCK
|
||||
; CHECK-NEXT: blob data = 'undefinedglobmainfunc{{.*}}'
|
||||
|
||||
|
||||
; ModuleID = 'thinlto-function-summary-callgraph.ll'
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
call void (...) @func()
|
||||
%u = load i32, i32* @undefinedglob
|
||||
ret i32 %u
|
||||
}
|
||||
|
||||
declare void @func(...) #1
|
||||
@undefinedglob = external global i32
|
||||
|
||||
; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls)
|
|
@ -306,6 +306,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
|
|||
return nullptr;
|
||||
STRINGIFY_CODE(FS, PERMODULE)
|
||||
STRINGIFY_CODE(FS, PERMODULE_PROFILE)
|
||||
STRINGIFY_CODE(FS, PERMODULE_RELBF)
|
||||
STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
|
||||
STRINGIFY_CODE(FS, COMBINED)
|
||||
STRINGIFY_CODE(FS, COMBINED_PROFILE)
|
||||
|
|
Loading…
Reference in New Issue