[CSSPGO] Introducing distribution factor for pseudo probe.

Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count.

This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes.

A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead.

Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D93264
This commit is contained in:
Hongtao Yu 2020-12-11 12:18:31 -08:00
parent d395007216
commit 3d89b3cbec
20 changed files with 595 additions and 74 deletions

View File

@ -6,12 +6,12 @@ void bar();
void go();
void foo(int x) {
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
if (x == 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
bar();
else
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
go();
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
}

View File

@ -981,12 +981,16 @@ public:
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
}
ConstantInt *getIndex() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
}
ConstantInt *getAttributes() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
}
ConstantInt *getIndex() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
ConstantInt *getFactor() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
}
};

View File

@ -1299,7 +1299,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int
// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
// optimizer as having opaque side effects so that it won't be get rid of or moved
// out of the block it probes.
def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOnly, IntrWillReturn]>;
// Intrinsics to support half precision floating point format

View File

@ -16,28 +16,39 @@
#include "llvm/ADT/Optional.h"
#include <cassert>
#include <cstdint>
#include <limits>
namespace llvm {
class Instruction;
class BasicBlock;
constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
// The saturated distrution factor representing 100% for block probes.
constexpr static uint64_t PseudoProbeFullDistributionFactor =
std::numeric_limits<uint64_t>::max();
struct PseudoProbeDwarfDiscriminator {
public:
// The following APIs encodes/decodes per-probe information to/from a
// 32-bit integer which is organized as:
// [2:0] - 0x7, this is reserved for regular discriminator,
// see DWARF discriminator encoding rule
// [18:3] - probe id
// [25:19] - reserved
// [25:19] - probe distribution factor
// [28:26] - probe type, see PseudoProbeType
// [31:29] - reserved for probe attributes
static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
uint32_t Factor) {
assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
return (Index << 3) | (Type << 26) | 0x7;
assert(Flags <= 0x7);
assert(Factor <= 100 &&
"Probe distribution factor too big to encode, exceeding 100");
return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
}
static uint32_t extractProbeIndex(uint32_t Value) {
@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator {
static uint32_t extractProbeAttributes(uint32_t Value) {
return (Value >> 29) & 0x7;
}
static uint32_t extractProbeFactor(uint32_t Value) {
return (Value >> 19) & 0x7F;
}
// The saturated distrution factor representing 100% for callsites.
constexpr static uint8_t FullDistributionFactor = 100;
};
struct PseudoProbe {
uint32_t Id;
uint32_t Type;
uint32_t Attr;
float Factor;
};
Optional<PseudoProbe> extractProbe(const Instruction &Inst);
void setProbeDistributionFactor(Instruction &Inst, float Factor);
} // end namespace llvm
#endif // LLVM_IR_PSEUDOPROBE_H

View File

@ -22,6 +22,7 @@
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include <string>
#include <utility>
@ -273,6 +274,7 @@ class StandardInstrumentations {
OptBisectInstrumentation OptBisect;
PreservedCFGCheckerInstrumentation PreservedCFGChecker;
IRChangedPrinter PrintChangedIR;
PseudoProbeVerifier PseudoProbeVerification;
VerifyInstrumentation Verify;
bool VerifyEach;

View File

@ -347,6 +347,16 @@ public:
return SortedTargets;
}
/// Prorate call targets by a distribution factor.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
float DistributionFactor) {
CallTargetMap AdjustedTargets;
for (const auto &I : Targets) {
AdjustedTargets[I.first()] = I.second * DistributionFactor;
}
return AdjustedTargets;
}
/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {

View File

@ -16,6 +16,10 @@
#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
@ -29,6 +33,8 @@ class Module;
using namespace sampleprof;
using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
using ProbeFactorMap = std::unordered_map<uint64_t, float>;
using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
@ -43,6 +49,33 @@ public:
uint64_t getFunctionHash() const { return FunctionHash; }
};
// A pseudo probe verifier that can be run after each IR passes to detect the
// violation of updating probe factors. In principle, the sum of distribution
// factor for a probe should be identical before and after a pass. For a
// function pass, the factor sum for a probe would be typically 100%.
class PseudoProbeVerifier {
public:
void registerCallbacks(PassInstrumentationCallbacks &PIC);
// Implementation of pass instrumentation callbacks for new pass manager.
void runAfterPass(StringRef PassID, Any IR);
private:
// Allow a little bias due the rounding to integral factors.
constexpr static float DistributionFactorVariance = 0.02;
// Distribution factors from last pass.
FuncProbeFactorMap FunctionProbeFactors;
void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
void runAfterPass(const Module *M);
void runAfterPass(const LazyCallGraph::SCC *C);
void runAfterPass(const Function *F);
void runAfterPass(const Loop *L);
bool shouldVerifyFunction(const Function *F);
void verifyProbeFactors(const Function *F,
const ProbeFactorMap &ProbeFactors);
};
// This class serves sample counts correlation for SampleProfileLoader by
// analyzing pseudo probes and their function descriptors injected by
// SampleProfileProber.
@ -102,5 +135,13 @@ public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
public:
PseudoProbeUpdatePass() {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H

View File

@ -35,6 +35,9 @@ Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) {
PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
Probe.Attr =
PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
Probe.Factor =
PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
(float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
return Probe;
}
}
@ -47,6 +50,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
Probe.Id = II->getIndex()->getZExtValue();
Probe.Type = (uint32_t)PseudoProbeType::Block;
Probe.Attr = II->getAttributes()->getZExtValue();
Probe.Factor = II->getFactor()->getZExtValue() /
(float)PseudoProbeFullDistributionFactor;
return Probe;
}
@ -55,4 +60,40 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
return None;
}
void setProbeDistributionFactor(Instruction &Inst, float Factor) {
assert(Factor >= 0 && Factor <= 1 &&
"Distribution factor must be in [0, 1.0]");
if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
IRBuilder<> Builder(&Inst);
uint64_t IntFactor = PseudoProbeFullDistributionFactor;
if (Factor < 1)
IntFactor *= Factor;
auto OrigFactor = II->getFactor()->getZExtValue();
if (IntFactor != OrigFactor)
II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
} else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
const DILocation *DIL = DLoc;
auto Discriminator = DIL->getDiscriminator();
if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
auto Index =
PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
auto Type =
PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
Discriminator);
// Round small factors to 0 to avoid over-counting.
uint32_t IntFactor =
PseudoProbeDwarfDiscriminator::FullDistributionFactor;
if (Factor < 1)
IntFactor *= Factor;
uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
Index, Type, Attr, IntFactor);
DIL = DIL->cloneWithDiscriminator(V);
Inst.setDebugLoc(DIL);
}
}
}
}
} // namespace llvm

View File

@ -1428,6 +1428,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
MPM.addPass(PseudoProbeUpdatePass());
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@ -1482,6 +1485,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
if (PTO.Coroutines)
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
MPM.addPass(PseudoProbeUpdatePass());
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);

View File

@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f
MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
#ifndef CGSCC_ANALYSIS

View File

@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks(
OptBisect.registerCallbacks(PIC);
PreservedCFGChecker.registerCallbacks(PIC);
PrintChangedIR.registerCallbacks(PIC);
PseudoProbeVerification.registerCallbacks(PIC);
if (VerifyEach)
Verify.registerCallbacks(PIC);
}

View File

@ -108,6 +108,8 @@ STATISTIC(NumCSNotInlined,
STATISTIC(NumMismatchedProfile,
"Number of functions with CFG mismatched profile");
STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
STATISTIC(NumDuplicatedInlinesite,
"Number of inlined callsites with a partial distribution factor");
STATISTIC(NumCSInlinedHitMinLimit,
"Number of functions with FDO inline stopped due to min size limit");
@ -358,7 +360,14 @@ private:
struct InlineCandidate {
CallBase *CallInstr;
const FunctionSamples *CalleeSamples;
// Prorated callsite count, which will be used to guide inlining. For example,
// if a callsite is duplicated in LTO prelink, then in LTO postlink the two
// copies will get their own distribution factors and their prorated counts
// will be used to decide if they should be inlined independently.
uint64_t CallsiteCount;
// Call site distribution factor to prorate the profile samples for a
// duplicated callsite. Default value is 1.0.
float CallsiteDistribution;
};
// Inline candidate comparer using call site weight
@ -418,8 +427,8 @@ protected:
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
// Attempt to promote indirect call and also inline the promoted call
bool tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t &Sum,
DenseSet<Instruction *> &PromotedInsns,
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@ -886,7 +895,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
if (R) {
uint64_t Samples = R.get();
uint64_t Samples = R.get() * Probe->Factor;
bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
if (FirstMark) {
ORE->emit([&]() {
@ -894,13 +903,17 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
Remark << "Applied " << ore::NV("NumSamples", Samples);
Remark << " samples from profile (ProbeId=";
Remark << ore::NV("ProbeId", Probe->Id);
Remark << ", Factor=";
Remark << ore::NV("Factor", Probe->Factor);
Remark << ", OriginalSamples=";
Remark << ore::NV("OriginalSamples", R.get());
Remark << ")";
return Remark;
});
}
LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
<< " - weight: " << R.get() << ")\n");
<< " - weight: " << R.get() << " - factor: "
<< format("%0.2f", Probe->Factor) << ")\n");
return Samples;
}
return R;
@ -1085,7 +1098,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// \param InlinedCallSite Output vector for new call sites exposed after
/// inlining.
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t &Sum,
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
DenseSet<Instruction *> &PromotedInsns,
SmallVector<CallBase *, 8> *InlinedCallSite) {
const char *Reason = "Callee function not available";
@ -1106,10 +1119,28 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Candidate.CallsiteCount, Sum, false, ORE);
if (DI) {
Sum -= Candidate.CallsiteCount;
// Prorate the indirect callsite distribution.
// Do not update the promoted direct callsite distribution at this
// point since the original distribution combined with the callee
// profile will be used to prorate callsites from the callee if
// inlined. Once not inlined, the direct callsite distribution should
// be prorated so that the it will reflect the real callsite counts.
setProbeDistributionFactor(*Candidate.CallInstr,
Candidate.CallsiteDistribution * Sum /
SumOrigin);
PromotedInsns.insert(Candidate.CallInstr);
Candidate.CallInstr = DI;
if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
return tryInlineCandidate(Candidate, InlinedCallSite);
if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
if (!Inlined) {
// Prorate the direct callsite distribution so that it reflects real
// callsite counts.
setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
Candidate.CallsiteCount /
SumOrigin);
}
return Inlined;
}
}
} else {
LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
@ -1216,11 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions(
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
InlineCandidate Candidate = {I,
LocalNotInlinedCallSites.count(I)
? LocalNotInlinedCallSites[I]
: nullptr,
0 /* dummy count */};
InlineCandidate Candidate = {
I,
LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
: nullptr,
0 /* dummy count */, 1.0 /* dummy distribution factor */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
@ -1229,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions(
continue;
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
PSI->getOrCompHotCountThreshold());
@ -1237,8 +1269,9 @@ bool SampleProfileLoader::inlineHotFunctions(
if (!callsiteIsHot(FS, PSI))
continue;
Candidate = {I, FS, FS->getEntrySamples()};
if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
Candidate = {I, FS, FS->getEntrySamples(), 1.0};
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
PromotedInsns)) {
LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
}
@ -1343,6 +1376,23 @@ bool SampleProfileLoader::tryInlineCandidate(
if (ProfileIsCS)
ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
++NumCSInlined;
// Prorate inlined probes for a duplicated inlining callsite which probably
// has a distribution less than 100%. Samples for an inlinee should be
// distributed among the copies of the original callsite based on each
// callsite's distribution factor for counts accuracy. Note that an inlined
// probe may come with its own distribution factor if it has been duplicated
// in the inlinee body. The two factor are multiplied to reflect the
// aggregation of duplication.
if (Candidate.CallsiteDistribution < 1) {
for (auto &I : IFI.InlinedCallSites) {
if (Optional<PseudoProbe> Probe = extractProbe(*I))
setProbeDistributionFactor(*I, Probe->Factor *
Candidate.CallsiteDistribution);
}
NumDuplicatedInlinesite++;
}
return true;
}
return false;
@ -1360,14 +1410,19 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
if (!CalleeSamples)
return false;
float Factor = 1.0;
if (Optional<PseudoProbe> Probe = extractProbe(*CB))
Factor = Probe->Factor;
uint64_t CallsiteCount = 0;
ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
if (Weight)
CallsiteCount = Weight.get();
if (CalleeSamples)
CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples());
CallsiteCount = std::max(
CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
*NewCandidate = {CB, CalleeSamples, CallsiteCount};
*NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
return true;
}
@ -1479,6 +1534,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
uint64_t Sum;
auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
uint64_t SumOrigin = Sum;
Sum *= Candidate.CallsiteDistribution;
for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@ -1486,7 +1542,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
PSI->getOrCompHotCountThreshold());
continue;
}
uint64_t EntryCountDistributed = FS->getEntrySamples();
uint64_t EntryCountDistributed =
FS->getEntrySamples() * Candidate.CallsiteDistribution;
// In addition to regular inline cost check, we also need to make sure
// ICP isn't introducing excessive speculative checks even if individual
// target looks beneficial to promote and inline. That means we should
@ -1505,9 +1562,10 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
SmallVector<CallBase *, 8> InlinedCallSites;
// Attach function profile for promoted indirect callee, and update
// call site count for the promoted inline candidate too.
Candidate = {I, FS, EntryCountDistributed};
if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
&InlinedCallSites)) {
Candidate = {I, FS, EntryCountDistributed,
Candidate.CallsiteDistribution};
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
PromotedInsns, &InlinedCallSites)) {
for (auto *CB : InlinedCallSites) {
if (getInlineCandidate(&NewCandidate, CB))
CQueue.emplace(NewCandidate);
@ -1965,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) {
auto T = FS->findCallTargetMapAt(CallSite);
if (!T || T.get().empty())
continue;
// Prorate the callsite counts to reflect what is already done to the
// callsite, such as ICP or calliste cloning.
if (FunctionSamples::ProfileIsProbeBased) {
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
if (Probe->Factor < 1)
T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
}
}
SmallVector<InstrProfValueData, 2> SortedCallTargets =
GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;

View File

@ -12,6 +12,7 @@
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@ -25,8 +26,10 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <unordered_set>
#include <vector>
using namespace llvm;
@ -35,6 +38,115 @@ using namespace llvm;
STATISTIC(ArtificialDbgLine,
"Number of probes that have an artificial debug line");
static cl::opt<bool>
VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
cl::desc("Do pseudo probe verification"));
static cl::list<std::string> VerifyPseudoProbeFuncList(
"verify-pseudo-probe-funcs", cl::Hidden,
cl::desc("The option to specify the name of the functions to verify."));
static cl::opt<bool>
UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
cl::desc("Update pseudo probe distribution factor"));
bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
// Skip function declaration.
if (F->isDeclaration())
return false;
// Skip function that will not be emitted into object file. The prevailing
// defintion will be verified instead.
if (F->hasAvailableExternallyLinkage())
return false;
// Do a name matching.
static std::unordered_set<std::string> VerifyFuncNames(
VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
}
void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
if (VerifyPseudoProbe) {
PIC.registerAfterPassCallback(
[this](StringRef P, Any IR, const PreservedAnalyses &) {
this->runAfterPass(P, IR);
});
}
}
// Callback to run after each transformation for the new pass manager.
void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
std::string Banner =
"\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
dbgs() << Banner;
if (any_isa<const Module *>(IR))
runAfterPass(any_cast<const Module *>(IR));
else if (any_isa<const Function *>(IR))
runAfterPass(any_cast<const Function *>(IR));
else if (any_isa<const LazyCallGraph::SCC *>(IR))
runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
else if (any_isa<const Loop *>(IR))
runAfterPass(any_cast<const Loop *>(IR));
else
llvm_unreachable("Unknown IR unit");
}
void PseudoProbeVerifier::runAfterPass(const Module *M) {
for (const Function &F : *M)
runAfterPass(&F);
}
void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
for (const LazyCallGraph::Node &N : *C)
runAfterPass(&N.getFunction());
}
void PseudoProbeVerifier::runAfterPass(const Function *F) {
if (!shouldVerifyFunction(F))
return;
ProbeFactorMap ProbeFactors;
for (const auto &BB : *F)
collectProbeFactors(&BB, ProbeFactors);
verifyProbeFactors(F, ProbeFactors);
}
void PseudoProbeVerifier::runAfterPass(const Loop *L) {
const Function *F = L->getHeader()->getParent();
runAfterPass(F);
}
void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
ProbeFactorMap &ProbeFactors) {
for (const auto &I : *Block) {
if (Optional<PseudoProbe> Probe = extractProbe(I))
ProbeFactors[Probe->Id] += Probe->Factor;
}
}
void PseudoProbeVerifier::verifyProbeFactors(
const Function *F, const ProbeFactorMap &ProbeFactors) {
bool BannerPrinted = false;
auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
for (const auto &I : ProbeFactors) {
float CurProbeFactor = I.second;
if (PrevProbeFactors.count(I.first)) {
float PrevProbeFactor = PrevProbeFactors[I.first];
if (std::abs(CurProbeFactor - PrevProbeFactor) >
DistributionFactorVariance) {
if (!BannerPrinted) {
dbgs() << "Function " << F->getName() << ":\n";
BannerPrinted = true;
}
dbgs() << "Probe " << I.first << "\tprevious factor "
<< format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
<< format("%0.2f", CurProbeFactor) << "\n";
}
}
// Update
PrevProbeFactors[I.first] = I.second;
}
}
PseudoProbeManager::PseudoProbeManager(const Module &M) {
if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
for (const auto *Operand : FuncInfo->operands()) {
@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
Function *ProbeFn =
llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
Builder.getInt32(0)};
Builder.getInt32(0),
Builder.getInt64(PseudoProbeFullDistributionFactor)};
auto *Probe = Builder.CreateCall(ProbeFn, Args);
AssignDebugLoc(Probe);
}
@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
// Levarge the 32-bit discriminator field of debug data to store the ID and
// type of a callsite probe. This gets rid of the dependency on plumbing a
// customized metadata through the codegen pipeline.
uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
if (auto DIL = Call->getDebugLoc()) {
DIL = DIL->cloneWithDiscriminator(V);
Call->setDebugLoc(DIL);
@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M,
return PreservedAnalyses::none();
}
void PseudoProbeUpdatePass::runOnFunction(Function &F,
FunctionAnalysisManager &FAM) {
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto BBProfileCount = [&BFI](BasicBlock *BB) {
return BFI.getBlockProfileCount(BB)
? BFI.getBlockProfileCount(BB).getValue()
: 0;
};
// Collect the sum of execution weight for each probe.
ProbeFactorMap ProbeFactors;
for (auto &Block : F) {
for (auto &I : Block) {
if (Optional<PseudoProbe> Probe = extractProbe(I))
ProbeFactors[Probe->Id] += BBProfileCount(&Block);
}
}
// Fix up over-counted probes.
for (auto &Block : F) {
for (auto &I : Block) {
if (Optional<PseudoProbe> Probe = extractProbe(I)) {
float Sum = ProbeFactors[Probe->Id];
if (Sum != 0)
setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
}
}
}
}
PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
ModuleAnalysisManager &AM) {
if (UpdatePseudoProbe) {
for (auto &F : M) {
if (F.isDeclaration())
continue;
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
runOnFunction(F, FAM);
}
}
return PreservedAnalyses::none();
}

View File

@ -0,0 +1,8 @@
foo:3200:13
1: 13
2: 7
3: 6
4: 13
5: 7
6: 6
!CFGChecksum: 844530426352218

View File

@ -11,14 +11,14 @@
; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ
define dso_local void @foo2() !dbg !7 {
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0
ret void, !dbg !10
}
define dso_local void @foo() #0 !dbg !11 {
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]]
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2
call void @foo2(), !dbg !12
@ -26,9 +26,9 @@ define dso_local void @foo() #0 !dbg !11 {
}
define dso_local i32 @entry() !dbg !14 {
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]]
; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
@ -41,13 +41,13 @@ define dso_local i32 @entry() !dbg !14 {
; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo"
; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]])
; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]])
;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe
;; with an index of 2.
; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751)
;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe
;; with an index of 2 and a scale of 100%.
; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551)
; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry"
; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3, scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]])
; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]])
; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751)
; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551)
; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]])
; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1]], inlinedAt: ![[#INL2]])

View File

@ -11,32 +11,36 @@
;; Check the generation of pseudoprobe intrinsic call.
@a = dso_local global i32 0, align 4
define void @foo(i32 %x) !dbg !3 {
bb0:
%cmp = icmp eq i32 %x, 0
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0
br i1 %cmp, label %bb1, label %bb2
bb1:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0
store i32 6, i32* @a, align 4
br label %bb3
bb2:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0
store i32 8, i32* @a, align 4
br label %bb3
bb3:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]]
ret void, !dbg !12
}
@ -44,7 +48,7 @@ declare void @bar(i32 %x)
define internal void @foo2(void (i32)* %f) !dbg !4 {
entry:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0
; Check pseudo_probe metadata attached to the indirect call instruction.
@ -64,13 +68,13 @@ entry:
; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]])
; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]])
; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]])
;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe
;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe
;; with an index of 2.
; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887)
; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687)
; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe
;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe
;; with an index of 3.
; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559)
; Check the generation of .pseudo_probe_desc section
; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat

View File

@ -12,18 +12,18 @@ target triple = "x86_64-unknown-linux-gnu"
define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
entry:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1)
%add = add nsw i32 %x, 100000, !dbg !19
;; Check zen is fully inlined so there's no call to zen anymore.
;; Check code from the inlining of zen is properly annotated here.
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
; CHECK-NOT: call i32 @zen
%call = call i32 @zen(i32 %add), !dbg !20
ret i32 %call, !dbg !21
@ -32,36 +32,36 @@ entry:
; CHECK: define dso_local i32 @zen
define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
entry:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1)
%cmp = icmp sgt i32 %x, 0, !dbg !26
br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
while.cond:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
%x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
%cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
br i1 %cmp1, label %while.body, label %if.end, !dbg !31
while.body:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
%0 = load volatile i32, i32* @factor, align 4, !dbg !32
%sub = sub nsw i32 %x.addr.0, %0, !dbg !39
br label %while.cond, !dbg !31
while.cond2:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
%x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
%cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
while.body4:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
%1 = load volatile i32, i32* @factor, align 4, !dbg !45
%add = add nsw i32 %x.addr.1, %1, !dbg !48
br label %while.cond2, !dbg !44
if.end:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
%x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
ret i32 %x.addr.2, !dbg !51
}
@ -109,6 +109,10 @@ if.end:
;YAML-NEXT: - NumSamples: '23'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '23'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
;YAML: --- !Analysis
@ -121,6 +125,10 @@ if.end:
;YAML-NEXT: - NumSamples: '23'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '23'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
;YAML: --- !Analysis
@ -133,6 +141,10 @@ if.end:
;YAML-NEXT: - NumSamples: '382920'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '2'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '382920'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...

View File

@ -8,26 +8,26 @@ entry:
store i32 %x, i32* %x.addr, align 4
%0 = load i32, i32* %x.addr, align 4
%cmp = icmp eq i32 %0, 0
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
br i1 %cmp, label %if.then, label %if.else
; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]]
if.then:
; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]]
call void %f(i32 1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
store i32 1, i32* %retval, align 4
br label %return
if.else:
; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
call void %f(i32 2)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
store i32 2, i32* %retval, align 4
br label %return
return:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
%1 = load i32, i32* %retval, align 4
ret i32 %1
}
@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe
;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
;; with an index of 5.
; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911)
; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe
;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
;; with an index of 6.
; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919)
; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
!llvm.module.flags = !{!9, !10}
@ -69,6 +69,10 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - NumSamples: '13'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '13'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
@ -80,6 +84,10 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - NumSamples: '7'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '5'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '7'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
@ -91,6 +99,10 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - NumSamples: '7'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '2'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '7'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
@ -102,6 +114,10 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '6'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '6'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
@ -113,6 +129,10 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '3'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '6'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
@ -124,4 +144,8 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - NumSamples: '13'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '4'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '13'
;YAML-NEXT: - String: ')'

View File

@ -0,0 +1,45 @@
; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S | FileCheck %s
declare i32 @f1()
declare i32 @f2()
declare void @f3()
;; This tests that the branch in 'merge' can be cloned up into T1.
define i32 @foo(i1 %cond, i1 %cond2) #0 {
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
br i1 %cond, label %T1, label %F1
T1:
; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]]
%v1 = call i32 @f1()
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
%cond3 = icmp eq i32 %v1, 412
br label %Merge
F1:
; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
%v2 = call i32 @f2()
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
br label %Merge
Merge:
%A = phi i1 [%cond3, %T1], [%cond2, %F1]
%B = phi i32 [%v1, %T1], [%v2, %F1]
br i1 %A, label %T2, label %F2
T2:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
call void @f3()
ret i32 %B
F2:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
ret i32 %B
}
; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7}
; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
attributes #0 = {"use-sample-profile"}

View File

@ -0,0 +1,77 @@
; REQUIRES: x86_64-linux
; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY
; RUN: FileCheck %s < %t
; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
; VERIFY: Function foo:
; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00
; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00
declare void @foo2() nounwind
define void @foo(i32 %x) {
bb:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
%tmp = alloca [5 x i32*], align 16
br label %bb7.preheader
bb3.loopexit:
%spec.select.lcssa = phi i32 [ %spec.select, %bb10 ]
%tmp5.not = icmp eq i32 %spec.select.lcssa, 0
br i1 %tmp5.not, label %bb24, label %bb7.preheader
bb7.preheader:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
%tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ]
br label %bb10
bb10:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
%indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
%tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
%tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv
%tmp14 = load i32*, i32** %tmp13, align 8
%tmp15.not = icmp ne i32* %tmp14, null
%tmp18 = sext i1 %tmp15.not to i32
%spec.select = add nsw i32 %tmp1.14, %tmp18
call void @foo2(), !dbg !12
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 5
br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
bb24:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
ret void
}
;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
;; with an index of 6 and a scale of -1%.
; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.9.0"}
!12 = !DILocation(line: 2, column: 20, scope: !4)
!13 = distinct !{!13, !14}
!14 = !{!"llvm.loop.unroll.full"}