forked from OSchip/llvm-project
parent
b788391260
commit
7eccf4ee0e
|
@ -21,6 +21,8 @@
|
|||
#include <vector>
|
||||
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/ManagedStatic.h"
|
||||
#include "llvm/Support/Mutex.h"
|
||||
|
||||
namespace llvm {
|
||||
class Function;
|
||||
|
@ -35,6 +37,7 @@ class LLVMContext;
|
|||
class Metadata;
|
||||
class MDTuple;
|
||||
class MDNode;
|
||||
class Module;
|
||||
|
||||
inline const char *getHotSectionPrefix() { return ".hot"; }
|
||||
inline const char *getUnlikelySectionPrefix() { return ".unlikely"; }
|
||||
|
@ -67,6 +70,14 @@ private:
|
|||
// appears in the profile. The map is kept sorted in the descending order of
|
||||
// counts.
|
||||
std::map<uint64_t, uint32_t, std::greater<uint64_t>> CountFrequencies;
|
||||
// Compute profile summary for a module.
|
||||
static ProfileSummary *computeProfileSummary(Module *M);
|
||||
// Cache of last seen module and its profile summary.
|
||||
static ManagedStatic<std::pair<Module *, std::unique_ptr<ProfileSummary>>>
|
||||
CachedSummary;
|
||||
// Mutex to access summary cache
|
||||
static ManagedStatic<sys::SmartMutex<true>> CacheMutex;
|
||||
|
||||
protected:
|
||||
SummaryEntryVector DetailedSummary;
|
||||
std::vector<uint32_t> DetailedSummaryCutoffs;
|
||||
|
@ -85,14 +96,12 @@ protected:
|
|||
: PSK(K), DetailedSummary(DetailedSummary), TotalCount(TotalCount),
|
||||
MaxCount(MaxCount), MaxFunctionCount(MaxFunctionCount),
|
||||
NumCounts(NumCounts), NumFunctions(NumFunctions) {}
|
||||
~ProfileSummary() = default;
|
||||
inline void addCount(uint64_t Count);
|
||||
/// \brief Return metadata specific to the profile format.
|
||||
/// Derived classes implement this method to return a vector of Metadata.
|
||||
virtual std::vector<Metadata *> getFormatSpecificMD(LLVMContext &Context) = 0;
|
||||
/// \brief Return detailed summary as metadata.
|
||||
Metadata *getDetailedSummaryMD(LLVMContext &Context);
|
||||
|
||||
public:
|
||||
static const int Scale = 1000000;
|
||||
Kind getKind() const { return PSK; }
|
||||
|
@ -111,6 +120,10 @@ public:
|
|||
static ProfileSummary *getFromMD(Metadata *MD);
|
||||
uint32_t getNumFunctions() { return NumFunctions; }
|
||||
uint64_t getMaxFunctionCount() { return MaxFunctionCount; }
|
||||
/// \brief Get profile summary associated with module \p M
|
||||
static inline ProfileSummary *getProfileSummary(Module *M);
|
||||
virtual ~ProfileSummary() = default;
|
||||
virtual bool operator==(ProfileSummary &Other);
|
||||
};
|
||||
|
||||
class InstrProfSummary final : public ProfileSummary {
|
||||
|
@ -140,6 +153,7 @@ public:
|
|||
uint64_t getTotalCount() { return TotalCount; }
|
||||
uint64_t getMaxBlockCount() { return MaxCount; }
|
||||
uint64_t getMaxInternalBlockCount() { return MaxInternalBlockCount; }
|
||||
bool operator==(ProfileSummary &Other) override;
|
||||
};
|
||||
|
||||
class SampleProfileSummary final : public ProfileSummary {
|
||||
|
@ -180,5 +194,24 @@ SummaryEntryVector &ProfileSummary::getDetailedSummary() {
|
|||
return DetailedSummary;
|
||||
}
|
||||
|
||||
ProfileSummary *ProfileSummary::getProfileSummary(Module *M) {
|
||||
if (!M)
|
||||
return nullptr;
|
||||
sys::SmartScopedLock<true> Lock(*CacheMutex);
|
||||
// Computing profile summary for a module involves parsing a fairly large
|
||||
// metadata and could be expensive. We use a simple cache of the last seen
|
||||
// module and its profile summary.
|
||||
if (CachedSummary->first != M) {
|
||||
auto *Summary = computeProfileSummary(M);
|
||||
// Do not cache if the summary is empty. This is because a later pass
|
||||
// (sample profile loader, for example) could attach the summary metadata on
|
||||
// the module.
|
||||
if (!Summary)
|
||||
return nullptr;
|
||||
CachedSummary->first = M;
|
||||
CachedSummary->second.reset(Summary);
|
||||
}
|
||||
return CachedSummary->second.get();
|
||||
}
|
||||
} // end namespace llvm
|
||||
#endif
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
|
@ -630,10 +631,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
|
|||
// a well-tuned heuristic based on *callsite* hotness and not callee hotness.
|
||||
uint64_t FunctionCount = 0, MaxFunctionCount = 0;
|
||||
bool HasPGOCounts = false;
|
||||
if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
|
||||
ProfileSummary *PS = ProfileSummary::getProfileSummary(Callee.getParent());
|
||||
if (Callee.getEntryCount() && PS) {
|
||||
HasPGOCounts = true;
|
||||
FunctionCount = Callee.getEntryCount().getValue();
|
||||
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
|
||||
MaxFunctionCount = PS->getMaxFunctionCount();
|
||||
}
|
||||
|
||||
// Listen to the inlinehint attribute or profile based hotness information
|
||||
|
|
|
@ -19,4 +19,4 @@
|
|||
type = Library
|
||||
name = Analysis
|
||||
parent = Libraries
|
||||
required_libraries = Core Support
|
||||
required_libraries = Core ProfileData Support
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
|
@ -32,6 +33,10 @@ const std::vector<uint32_t> ProfileSummary::DefaultCutoffs(
|
|||
900000, 950000, 990000, 999000, 999900, 999990, 999999});
|
||||
const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"};
|
||||
|
||||
ManagedStatic<std::pair<Module *, std::unique_ptr<ProfileSummary>>>
|
||||
ProfileSummary::CachedSummary;
|
||||
ManagedStatic<sys::SmartMutex<true>> ProfileSummary::CacheMutex;
|
||||
|
||||
void InstrProfSummary::addRecord(const InstrProfRecord &R) {
|
||||
// The first counter is not necessarily an entry count for IR
|
||||
// instrumentation profiles.
|
||||
|
@ -86,6 +91,39 @@ void ProfileSummary::computeDetailedSummary() {
|
|||
}
|
||||
}
|
||||
|
||||
bool ProfileSummary::operator==(ProfileSummary &Other) {
|
||||
if (getKind() != Other.getKind())
|
||||
return false;
|
||||
if (TotalCount != Other.TotalCount)
|
||||
return false;
|
||||
if (MaxCount != Other.MaxCount)
|
||||
return false;
|
||||
if (MaxFunctionCount != Other.MaxFunctionCount)
|
||||
return false;
|
||||
if (NumFunctions != Other.NumFunctions)
|
||||
return false;
|
||||
if (NumCounts != Other.NumCounts)
|
||||
return false;
|
||||
std::vector<ProfileSummaryEntry> DS1 = getDetailedSummary();
|
||||
std::vector<ProfileSummaryEntry> DS2 = Other.getDetailedSummary();
|
||||
auto CompareSummaryEntry = [](ProfileSummaryEntry &E1,
|
||||
ProfileSummaryEntry &E2) {
|
||||
return E1.Cutoff == E2.Cutoff && E1.MinCount == E2.MinCount &&
|
||||
E1.NumCounts == E2.NumCounts;
|
||||
};
|
||||
if (!std::equal(DS1.begin(), DS1.end(), DS2.begin(), CompareSummaryEntry))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InstrProfSummary::operator==(ProfileSummary &Other) {
|
||||
InstrProfSummary *OtherIPS = dyn_cast<InstrProfSummary>(&Other);
|
||||
if (!OtherIPS)
|
||||
return false;
|
||||
return MaxInternalBlockCount == OtherIPS->MaxInternalBlockCount &&
|
||||
ProfileSummary::operator==(Other);
|
||||
}
|
||||
|
||||
// Returns true if the function is a hot function.
|
||||
bool ProfileSummary::isFunctionHot(const Function *F) {
|
||||
// FIXME: update when summary data is stored in module's metadata.
|
||||
|
@ -366,3 +404,9 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) {
|
|||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ProfileSummary *ProfileSummary::computeProfileSummary(Module *M) {
|
||||
if (Metadata *MD = M->getProfileSummary())
|
||||
return getFromMD(MD);
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; A callee with identical body does gets inlined because cost fits within the
|
||||
; inline-threshold
|
||||
|
||||
define i32 @callee1(i32 %x) !prof !1 {
|
||||
define i32 @callee1(i32 %x) !prof !21 {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
|
@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
|
|||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @callee2(i32 %x) !prof !2 {
|
||||
define i32 @callee2(i32 %x) !prof !22 {
|
||||
; CHECK-LABEL: @callee2(
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
|
@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
|
|||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @caller2(i32 %y1) !prof !2 {
|
||||
define i32 @caller2(i32 %y1) !prof !22 {
|
||||
; CHECK-LABEL: @caller2(
|
||||
; CHECK: call i32 @callee2
|
||||
; CHECK-NOT: call i32 @callee1
|
||||
|
@ -32,8 +32,19 @@ define i32 @caller2(i32 %y1) !prof !2 {
|
|||
ret i32 %y3
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
|
||||
!1 = !{!"function_entry_count", i64 100}
|
||||
!2 = !{!"function_entry_count", i64 1}
|
||||
!llvm.module.flags = !{!1}
|
||||
!21 = !{!"function_entry_count", i64 100}
|
||||
!22 = !{!"function_entry_count", i64 1}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxBlockCount", i64 1000}
|
||||
!6 = !{!"MaxInternalBlockCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumBlocks", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12}
|
||||
!12 = !{i32 10000, i64 0, i32 0}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; A cold callee with identical body does not get inlined because cost exceeds the
|
||||
; inline-threshold
|
||||
|
||||
define i32 @callee1(i32 %x) !prof !1 {
|
||||
define i32 @callee1(i32 %x) !prof !20 {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
|
@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
|
|||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @callee2(i32 %x) !prof !2 {
|
||||
define i32 @callee2(i32 %x) !prof !21 {
|
||||
; CHECK-LABEL: @callee2(
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
|
@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
|
|||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @caller2(i32 %y1) !prof !2 {
|
||||
define i32 @caller2(i32 %y1) !prof !21 {
|
||||
; CHECK-LABEL: @caller2(
|
||||
; CHECK: call i32 @callee2
|
||||
; CHECK-NOT: call i32 @callee1
|
||||
|
@ -32,8 +32,19 @@ define i32 @caller2(i32 %y1) !prof !2 {
|
|||
ret i32 %y3
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"MaxFunctionCount", i32 10}
|
||||
!1 = !{!"function_entry_count", i64 10}
|
||||
!2 = !{!"function_entry_count", i64 1}
|
||||
!llvm.module.flags = !{!1}
|
||||
!20 = !{!"function_entry_count", i64 10}
|
||||
!21 = !{!"function_entry_count", i64 1}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxBlockCount", i64 10}
|
||||
!6 = !{!"MaxInternalBlockCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 10}
|
||||
!8 = !{!"NumBlocks", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12}
|
||||
!12 = !{i32 10000, i64 0, i32 0}
|
||||
|
|
|
@ -8,5 +8,6 @@ set(LLVM_LINK_COMPONENTS
|
|||
add_llvm_unittest(ProfileDataTests
|
||||
CoverageMappingTest.cpp
|
||||
InstrProfTest.cpp
|
||||
ProfileSummaryTest.cpp
|
||||
SampleProfTest.cpp
|
||||
)
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
//===- unittest/ProfileData/ProfileSummaryTest.cpp --------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace sampleprof;
|
||||
|
||||
struct ProfileSummaryTest : ::testing::Test {
|
||||
InstrProfSummary IPS;
|
||||
SampleProfileSummary SPS;
|
||||
|
||||
ProfileSummaryTest()
|
||||
: IPS({100000, 900000, 999999}), SPS({100000, 900000, 999999}) {}
|
||||
void SetUp() {
|
||||
InstrProfRecord Record1("func1", 0x1234, {97531, 5, 99999});
|
||||
InstrProfRecord Record2("func2", 0x1234, {57341, 10000, 10, 1});
|
||||
IPS.addRecord(Record1);
|
||||
IPS.addRecord(Record2);
|
||||
|
||||
IPS.computeDetailedSummary();
|
||||
|
||||
FunctionSamples FooSamples;
|
||||
FooSamples.addTotalSamples(7711);
|
||||
FooSamples.addHeadSamples(610);
|
||||
FooSamples.addBodySamples(1, 0, 610);
|
||||
FooSamples.addBodySamples(2, 0, 600);
|
||||
FooSamples.addBodySamples(4, 0, 60000);
|
||||
FooSamples.addBodySamples(8, 0, 60351);
|
||||
FooSamples.addBodySamples(10, 0, 605);
|
||||
|
||||
FunctionSamples BarSamples;
|
||||
BarSamples.addTotalSamples(20301);
|
||||
BarSamples.addHeadSamples(1437);
|
||||
BarSamples.addBodySamples(1, 0, 1437);
|
||||
|
||||
SPS.addRecord(FooSamples);
|
||||
SPS.addRecord(BarSamples);
|
||||
|
||||
SPS.computeDetailedSummary();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_F(ProfileSummaryTest, summary_from_module) {
|
||||
LLVMContext Context;
|
||||
Module M1("M1", Context);
|
||||
EXPECT_FALSE(ProfileSummary::getProfileSummary(&M1));
|
||||
M1.setProfileSummary(IPS.getMD(Context));
|
||||
EXPECT_TRUE(IPS == *ProfileSummary::getProfileSummary(&M1));
|
||||
|
||||
Module M2("M2", Context);
|
||||
EXPECT_FALSE(ProfileSummary::getProfileSummary(&M2));
|
||||
M2.setProfileSummary(SPS.getMD(Context));
|
||||
EXPECT_TRUE(SPS == *ProfileSummary::getProfileSummary(&M2));
|
||||
}
|
Loading…
Reference in New Issue