[SampleFDO] Flow Sensitive Sample FDO (FSAFDO) profile loader

This patch implements Flow Sensitive Sample FDO (FSAFDO) profile
loader. We have two profile loaders for FS profile,
one before RegAlloc and one before BlockPlacement.

To enable it, when -fprofile-sample-use=<profile> is specified,
add "-enable-fs-discriminator=true \
     -disable-ra-fsprofile-loader=false \
     -disable-layout-fsprofile-loader=false"
to turn on the FS profile loaders.

Differential Revision: https://reviews.llvm.org/D107878
This commit is contained in:
Rong Xu 2021-08-18 16:59:02 -07:00
parent c777e51468
commit 5fdaaf7fd8
19 changed files with 680 additions and 74 deletions

View File

@ -1261,6 +1261,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
"", PGOOptions::NoAction, PGOOptions::CSIRInstr,
CodeGenOpts.DebugInfoForProfiling);
}
if (TM)
TM->setPGOOption(PGOOpt);
PipelineTuningOptions PTO;
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;

View File

@ -0,0 +1,81 @@
//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the supoorting functions for machine level Sample FDO
// loader. This is used in Flow Sensitive SampelFDO.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include <cassert>
namespace llvm {
using namespace sampleprof;
class MIRProfileLoader;
class MIRProfileLoaderPass : public MachineFunctionPass {
MachineFunction *MF;
std::string ProfileFileName;
FSDiscriminatorPass P;
unsigned LowBit;
unsigned HighBit;
public:
static char ID;
/// FS bits will only use the '1' bits in the Mask.
MIRProfileLoaderPass(std::string FileName = "",
std::string RemappingFileName = "",
FSDiscriminatorPass P = FSDiscriminatorPass::Pass1)
: MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
MIRSampleLoader(
std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
LowBit = getFSPassBitBegin(P);
HighBit = getFSPassBitEnd(P);
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
/// getMachineFunction - Return the last machine function computed.
const MachineFunction *getMachineFunction() const { return MF; }
private:
void init(MachineFunction &MF);
bool runOnMachineFunction(MachineFunction &) override;
bool doInitialization(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
std::unique_ptr<MIRProfileLoader> MIRSampleLoader;
/// Hold the information of the basic block frequency.
MachineBlockFrequencyInfo *MBFI;
};
} // namespace llvm
#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H

View File

@ -112,6 +112,12 @@ public:
return DT->dominates(A, B);
}
void getDescendants(MachineBasicBlock *A,
SmallVectorImpl<MachineBasicBlock *> &Result) {
applySplitCriticalEdges();
DT->getDescendants(A, Result);
}
bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);

View File

@ -118,6 +118,12 @@ public:
: DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
PassName, RemarkName, Loc, MBB) {}
MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
const MachineInstr *MI)
: DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
PassName, RemarkName, MI->getDebugLoc(),
MI->getParent()) {}
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == DK_MachineOptimizationRemarkAnalysis;
}

View File

@ -171,6 +171,9 @@ namespace llvm {
/// This pass adds flow sensitive discriminators.
extern char &MIRAddFSDiscriminatorsID;
/// This pass reads flow sensitive profile.
extern char &MIRProfileLoaderPassID;
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
@ -513,6 +516,11 @@ namespace llvm {
FunctionPass *
createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
/// Read Flow Sensitive Profile.
FunctionPass *createMIRProfileLoaderPass(std::string File,
std::string RemappingFile,
sampleprof::FSDiscriminatorPass P);
/// Creates MIR Debugify pass. \see MachineDebugify.cpp
ModulePass *createDebugifyMachineModulePass();

View File

@ -2212,7 +2212,8 @@ unsigned DILocation::getCopyIdentifier() const {
return getCopyIdentifierFromDiscriminator(getDiscriminator());
}
Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
Optional<const DILocation *>
DILocation::cloneWithBaseDiscriminator(unsigned D) const {
unsigned BD, DF, CI;
if (EnableFSDiscriminator) {
@ -2230,7 +2231,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
return None;
}
Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
Optional<const DILocation *>
DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
DF *= getDuplicationFactor();

View File

@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
void initializeAAResultsWrapperPassPass(PassRegistry&);
void initializeADCELegacyPassPass(PassRegistry&);
void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
void initializeAddFSDiscriminatorsPass(PassRegistry &);
void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeHardwareLoopsPass(PassRegistry&);
void initializeMIRProfileLoaderPassPass(PassRegistry &);
void initializeMemProfilerLegacyPassPass(PassRegistry &);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);

View File

@ -20,6 +20,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/PGOOptions.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/Instrumentation.h"
@ -32,49 +33,6 @@ class AAManager;
class TargetMachine;
class ModuleSummaryIndex;
/// A struct capturing PGO tunables.
struct PGOOptions {
enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
CSPGOAction CSAction = NoCSAction,
bool DebugInfoForProfiling = false,
bool PseudoProbeForProfiling = false)
: ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
ProfileRemappingFile(ProfileRemappingFile), Action(Action),
CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
(Action == SampleUse &&
!PseudoProbeForProfiling)),
PseudoProbeForProfiling(PseudoProbeForProfiling) {
// Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
// callback with IRUse action without ProfileFile.
// If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
assert(this->CSAction == NoCSAction ||
(this->Action != IRInstr && this->Action != SampleUse));
// For CSIRInstr, CSProfileGenFile also needs to be nonempty.
assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
// If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
// a profile.
assert(this->CSAction != CSIRUse || this->Action == IRUse);
// If neither Action nor CSAction, DebugInfoForProfiling or
// PseudoProbeForProfiling needs to be true.
assert(this->Action != NoAction || this->CSAction != NoCSAction ||
this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
}
std::string ProfileFile;
std::string CSProfileGenFile;
std::string ProfileRemappingFile;
PGOAction Action;
CSPGOAction CSAction;
bool DebugInfoForProfiling;
bool PseudoProbeForProfiling;
};
/// Tunable parameters for passes in the default pipelines.
class PipelineTuningOptions {
public:

View File

@ -0,0 +1,65 @@
//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Define option tunables for PGO.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_PGOOPTIONS_H
#define LLVM_SUPPORT_PGOOPTIONS_H
#include "llvm/Support/Error.h"
namespace llvm {
/// A struct capturing PGO tunables.
struct PGOOptions {
enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
CSPGOAction CSAction = NoCSAction,
bool DebugInfoForProfiling = false,
bool PseudoProbeForProfiling = false)
: ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
ProfileRemappingFile(ProfileRemappingFile), Action(Action),
CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
(Action == SampleUse &&
!PseudoProbeForProfiling)),
PseudoProbeForProfiling(PseudoProbeForProfiling) {
// Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
// callback with IRUse action without ProfileFile.
// If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
assert(this->CSAction == NoCSAction ||
(this->Action != IRInstr && this->Action != SampleUse));
// For CSIRInstr, CSProfileGenFile also needs to be nonempty.
assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
// If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
// a profile.
assert(this->CSAction != CSIRUse || this->Action == IRUse);
// If neither Action nor CSAction, DebugInfoForProfiling or
// PseudoProbeForProfiling needs to be true.
assert(this->Action != NoAction || this->CSAction != NoCSAction ||
this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
}
std::string ProfileFile;
std::string CSProfileGenFile;
std::string ProfileRemappingFile;
PGOAction Action;
CSPGOAction CSAction;
bool DebugInfoForProfiling;
bool PseudoProbeForProfiling;
};
} // namespace llvm
#endif

View File

@ -13,6 +13,7 @@
#ifndef LLVM_TARGET_TARGETMACHINE_H
#define LLVM_TARGET_TARGETMACHINE_H
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@ -20,6 +21,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/PGOOptions.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
@ -110,6 +112,9 @@ protected: // Can only create subclasses.
unsigned RequireStructuredCFG : 1;
unsigned O0WantsFastISel : 1;
// PGO related tunables.
Optional<PGOOptions> PGOOption = None;
public:
const TargetOptions DefaultOptions;
mutable TargetOptions Options;
@ -303,6 +308,9 @@ public:
return false;
}
void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; }
const Optional<PGOOptions> &getPGOOption() const { return PGOOption; }
/// If the specified generic pointer could be assumed as a pointer to a
/// specific address space, return that address space.
///

View File

@ -56,15 +56,20 @@ template <> struct IRTraits<BasicBlock> {
using FunctionT = Function;
using BlockFrequencyInfoT = BlockFrequencyInfo;
using LoopT = Loop;
using LoopInfoT = LoopInfo;
using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
using PostDominatorTreeT = PostDominatorTree;
using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
using OptRemarkEmitterT = OptimizationRemarkEmitter;
using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
using DominatorTreeT = DominatorTree;
using PostDominatorTreeT = PostDominatorTree;
using PredRangeT = pred_range;
using SuccRangeT = succ_range;
static Function &getFunction(Function &F) { return F; }
static const BasicBlock *getEntryBB(const Function *F) {
return &F->getEntryBlock();
}
static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
};
} // end namespace afdo_detail
@ -76,7 +81,8 @@ extern cl::opt<bool> NoWarnSampleUnused;
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {}
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
: Filename(Name), RemappingFilename(RemapName) {}
void dump() { Reader->dump(); }
using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
@ -85,14 +91,19 @@ public:
typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT;
using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
using DominatorTreePtrT =
typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
using PostDominatorTreePtrT =
typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
using PostDominatorTreeT =
typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
using OptRemarkEmitterT =
typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
using OptRemarkAnalysisT =
typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT;
using PostDominatorTreeT =
typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
using EquivalenceClassMap =
@ -112,6 +123,12 @@ protected:
const BasicBlockT *getEntryBB(const FunctionT *F) {
return afdo_detail::IRTraits<BT>::getEntryBB(F);
}
PredRangeT getPredecessors(BasicBlockT *BB) {
return afdo_detail::IRTraits<BT>::getPredecessors(BB);
}
SuccRangeT getSuccessors(BasicBlockT *BB) {
return afdo_detail::IRTraits<BT>::getSuccessors(BB);
}
unsigned getFunctionLoc(FunctionT &Func);
virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
@ -129,12 +146,11 @@ protected:
void findEquivalencesFor(BasicBlockT *BB1,
ArrayRef<BasicBlockT *> Descendants,
PostDominatorTreeT *DomTree);
void propagateWeights(FunctionT &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(FunctionT &F);
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
void clearFunctionData();
void clearFunctionData(bool ResetDT = true);
void computeDominanceAndLoopInfo(FunctionT &F);
bool
computeAndPropagateWeights(FunctionT &F,
@ -168,9 +184,9 @@ protected:
EquivalenceClassMap EquivalenceClass;
/// Dominance, post-dominance and loop information.
std::unique_ptr<DominatorTreeT> DT;
std::unique_ptr<PostDominatorTreeT> PDT;
std::unique_ptr<LoopInfoT> LI;
DominatorTreePtrT DT;
PostDominatorTreePtrT PDT;
LoopInfoPtrT LI;
/// Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@ -190,6 +206,9 @@ protected:
/// Name of the profile file to load.
std::string Filename;
/// Name of the profile remapping file to load.
std::string RemappingFilename;
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
@ -199,15 +218,17 @@ protected:
/// Clear all the per-function data used to load samples and propagate weights.
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() {
void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
BlockWeights.clear();
EdgeWeights.clear();
VisitedBlocks.clear();
VisitedEdges.clear();
EquivalenceClass.clear();
DT = nullptr;
PDT = nullptr;
LI = nullptr;
if (ResetDT) {
DT = nullptr;
PDT = nullptr;
LI = nullptr;
}
Predecessors.clear();
Successors.clear();
CoverageTracker.clear();
@ -475,7 +496,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
// class by making BB2's equivalence class be BB1.
DominatedBBs.clear();
DT->getDescendants(BB1, DominatedBBs);
findEquivalencesFor(BB1, DominatedBBs, PDT.get());
findEquivalencesFor(BB1, DominatedBBs, &*PDT);
LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
}
@ -692,7 +713,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
SmallPtrSet<BasicBlockT *, 16> Visited;
if (!Predecessors[B1].empty())
llvm_unreachable("Found a stale predecessors list in a basic block.");
for (BasicBlockT *B2 : predecessors(B1))
for (auto *B2 : getPredecessors(B1))
if (Visited.insert(B2).second)
Predecessors[B1].push_back(B2);
@ -700,7 +721,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
Visited.clear();
if (!Successors[B1].empty())
llvm_unreachable("Found a stale successors list in a basic block.");
for (BasicBlockT *B2 : successors(B1))
for (auto *B2 : getSuccessors(B1))
if (Visited.insert(B2).second)
Successors[B1].push_back(B2);
}
@ -911,12 +932,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
FunctionT &F) {
DT.reset(new DominatorTreeT);
DT.reset(new DominatorTree);
DT->recalculate(F);
PDT.reset(new PostDominatorTree(F));
LI.reset(new LoopInfoT);
LI.reset(new LoopInfo);
LI->analyze(*DT);
}

View File

@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
MachineTraceMetrics.cpp
MachineVerifier.cpp
MIRFSDiscriminator.cpp
MIRSampleProfile.cpp
MIRYamlMapping.cpp
ModuloSchedule.cpp
MultiHazardRecognizer.cpp

View File

@ -0,0 +1,335 @@
//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides the implementation of the MIRSampleProfile loader, mainly
// for flow sensitive SampleFDO.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIRSampleProfile.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
using namespace llvm;
using namespace sampleprof;
using namespace llvm::sampleprofutil;
using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "fs-profile-loader"
static cl::opt<bool> ShowFSBranchProb(
"show-fs-branchprob", cl::Hidden, cl::init(false),
cl::desc("Print setting flow sensitive branch probabilities"));
static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
"fs-profile-debug-prob-diff-threshold", cl::init(10),
cl::desc("Only show debug message if the branch probility is greater than "
"this value (in percentage)."));
static cl::opt<unsigned> FSProfileDebugBWThreshold(
"fs-profile-debug-bw-threshold", cl::init(10000),
cl::desc("Only show debug message if the source branch weight is greater "
" than this value."));
static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
cl::init(false),
cl::desc("View BFI before MIR loader"));
static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
cl::init(false),
cl::desc("View BFI after MIR loader"));
char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
"Load MIR Sample Profile",
/* cfg = */ false, /* is_analysis = */ false)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
/* cfg = */ false, /* is_analysis = */ false)
char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
std::string RemappingFile,
FSDiscriminatorPass P) {
return new MIRProfileLoaderPass(File, RemappingFile, P);
}
namespace llvm {
// Internal option used to control BFI display only after MBP pass.
// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
// -view-block-layout-with-bfi={none | fraction | integer | count}
extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
// Command line option to specify the name of the function for CFG dump
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
namespace afdo_detail {
template <> struct IRTraits<MachineBasicBlock> {
using InstructionT = MachineInstr;
using BasicBlockT = MachineBasicBlock;
using FunctionT = MachineFunction;
using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
using LoopT = MachineLoop;
using LoopInfoPtrT = MachineLoopInfo *;
using DominatorTreePtrT = MachineDominatorTree *;
using PostDominatorTreePtrT = MachinePostDominatorTree *;
using PostDominatorTreeT = MachinePostDominatorTree;
using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
return GraphTraits<const MachineFunction *>::getEntryNode(F);
}
static PredRangeT getPredecessors(MachineBasicBlock *BB) {
return BB->predecessors();
}
static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
return BB->successors();
}
};
} // namespace afdo_detail
class MIRProfileLoader final
: public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
public:
void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
MachineOptimizationRemarkEmitter *MORE) {
DT = MDT;
PDT = MPDT;
LI = MLI;
BFI = MBFI;
ORE = MORE;
}
void setFSPass(FSDiscriminatorPass Pass) {
P = Pass;
LowBit = getFSPassBitBegin(P);
HighBit = getFSPassBitEnd(P);
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
MIRProfileLoader(StringRef Name, StringRef RemapName)
: SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
}
void setBranchProbs(MachineFunction &F);
bool runOnFunction(MachineFunction &F);
bool doInitialization(Module &M);
bool isValid() const { return ProfileIsValid; }
protected:
friend class SampleCoverageTracker;
/// Hold the information of the basic block frequency.
MachineBlockFrequencyInfo *BFI;
/// PassNum is the sequence number this pass is called, start from 1.
FSDiscriminatorPass P;
// LowBit in the FS discriminator used by this instance. Note the number is
// 0-based. Base discrimnator use bit 0 to bit 11.
unsigned LowBit;
// HighwBit in the FS discriminator used by this instance. Note the number
// is 0-based.
unsigned HighBit;
bool ProfileIsValid = true;
};
template <>
void SampleProfileLoaderBaseImpl<
MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
for (auto &BI : F) {
MachineBasicBlock *BB = &BI;
if (BB->succ_size() < 2)
continue;
const MachineBasicBlock *EC = EquivalenceClass[BB];
uint64_t BBWeight = BlockWeights[EC];
uint64_t SumEdgeWeight = 0;
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end();
SI != SE; ++SI) {
MachineBasicBlock *Succ = *SI;
Edge E = std::make_pair(BB, Succ);
SumEdgeWeight += EdgeWeights[E];
}
if (BBWeight != SumEdgeWeight) {
LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
<< BBWeight << " SumEdgeWeight= " << SumEdgeWeight
<< "\n");
BBWeight = SumEdgeWeight;
}
if (BBWeight == 0) {
LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
continue;
}
#ifndef NDEBUG
uint64_t BBWeightOrig = BBWeight;
#endif
uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
uint32_t Factor = 1;
if (BBWeight > MaxWeight) {
Factor = BBWeight / MaxWeight + 1;
BBWeight /= Factor;
LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
}
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end();
SI != SE; ++SI) {
MachineBasicBlock *Succ = *SI;
Edge E = std::make_pair(BB, Succ);
uint64_t EdgeWeight = EdgeWeights[E];
EdgeWeight /= Factor;
assert(BBWeight >= EdgeWeight &&
"BBweight is larger than EdgeWeight -- should not happen.\n");
BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
BranchProbability NewProb(EdgeWeight, BBWeight);
if (OldProb == NewProb)
continue;
BB->setSuccProbability(SI, NewProb);
#ifndef NDEBUG
if (!ShowFSBranchProb)
continue;
bool Show = false;
BranchProbability Diff;
if (OldProb > NewProb)
Diff = OldProb - NewProb;
else
Diff = NewProb - OldProb;
Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
auto DIL = BB->findBranchDebugLoc();
auto SuccDIL = Succ->findBranchDebugLoc();
if (Show) {
dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
<< Succ->getNumber() << "): ";
if (DIL)
dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn();
if (SuccDIL)
dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
<< ":" << SuccDIL->getColumn();
dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
<< "\n";
}
#endif
}
}
}
bool MIRProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
Reader->setModule(&M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
Reader->getSummary();
return true;
}
bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
Function &Func = MF.getFunction();
clearFunctionData(false);
Samples = Reader->getSamplesFor(Func);
if (!Samples || Samples->empty())
return false;
if (getFunctionLoc(MF) == 0)
return false;
DenseSet<GlobalValue::GUID> InlinedGUIDs;
bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
// Set the new BPI, BFI.
setBranchProbs(MF);
return Changed;
}
} // namespace llvm
bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
if (!MIRSampleLoader->isValid())
return false;
LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
<< MF.getFunction().getName() << "\n");
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MIRSampleLoader->setInitVals(
&getAnalysis<MachineDominatorTree>(),
&getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
MF.RenumberBlocks();
if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
}
bool Changed = MIRSampleLoader->runOnFunction(MF);
if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
}
return Changed;
}
bool MIRProfileLoaderPass::doInitialization(Module &M) {
LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
<< "\n");
MIRSampleLoader->setFSPass(P);
return MIRSampleLoader->doInitialization(M);
}
void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequiredTransitive<MachineLoopInfo>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}

View File

@ -172,6 +172,24 @@ static cl::opt<bool>
FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
cl::desc("Do not insert FS-AFDO discriminators before "
"emit."));
// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
// tuning purpose.
static cl::opt<bool> DisableRAFSProfileLoader(
"disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
cl::desc("Disable MIRProfileLoader before RegAlloc"));
// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
// and tuning purpose.
static cl::opt<bool> DisableLayoutFSProfileLoader(
"disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
cl::desc("Disable MIRProfileLoader before BlockPlacement"));
// Specify FSProfile file name.
static cl::opt<std::string>
FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Flow Sensitive profile file name."), cl::Hidden);
// Specify Remapping file for FSProfile.
static cl::opt<std::string> FSRemappingFile(
"fs-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
return TargetID;
}
// Find the FSProfile file name. The internal option takes the precedence
// before getting from TargetMachine.
static const std::string getFSProfileFile(const TargetMachine *TM) {
if (!FSProfileFile.empty())
return FSProfileFile.getValue();
const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
return std::string();
return PGOOpt->ProfileFile;
}
// Find the Profile remapping file name. The internal option takes the
// precedence before getting from TargetMachine.
static const std::string getFSRemappingFile(const TargetMachine *TM) {
if (!FSRemappingFile.empty())
return FSRemappingFile.getValue();
const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
return std::string();
return PGOOpt->ProfileRemappingFile;
}
//===---------------------------------------------------------------------===//
/// TargetPassConfig
//===---------------------------------------------------------------------===//
@ -1115,9 +1155,15 @@ void TargetPassConfig::addMachinePasses() {
// Add a FSDiscriminator pass right before RA, so that we could get
// more precise SampleFDO profile for RA.
if (EnableFSDiscriminator)
if (EnableFSDiscriminator) {
addPass(createMIRAddFSDiscriminatorsPass(
sampleprof::FSDiscriminatorPass::Pass1));
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
addPass(
createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
sampleprof::FSDiscriminatorPass::Pass1));
}
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
@ -1471,9 +1517,15 @@ bool TargetPassConfig::addGCPasses() {
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
if (EnableFSDiscriminator)
if (EnableFSDiscriminator) {
addPass(createMIRAddFSDiscriminatorsPass(
sampleprof::FSDiscriminatorPass::Pass2));
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
addPass(
createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
sampleprof::FSDiscriminatorPass::Pass2));
}
if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)

View File

@ -230,6 +230,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
PGOOptions::NoCSAction, true);
}
if (TM)
TM->setPGOOption(PGOOpt);
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;

View File

@ -358,10 +358,10 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
: SampleProfileLoaderBaseImpl(std::string(Name)),
: SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
LTOPhase(LTOPhase) {}
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@ -417,9 +417,6 @@ protected:
/// Profile tracker for different context.
std::unique_ptr<SampleContextTracker> ContextTracker;
/// Name of the profile remapping file to load.
std::string RemappingFilename;
/// Flag indicating whether input profile is context-sensitive
bool ProfileIsCS = false;

View File

@ -0,0 +1,35 @@
work:42380966:1346190
1: 1246499
5: 1246499
foo:28798256:4267
0: 4267
2.1: 255999
4: 264627 bar:250018
4.512: 269485 bar:278102
4.4608: 280297 bar:280933
4.12288: 278916 bar:267752
5: 264627
5.4096: 269485
5.8192: 260670
5.8704: 278916
6: 11541
6.3584: 278916 work:284547
6.4096: 260670 work:249428
6.8704: 11541
7: 272442
7.512: 283590
7.4608: 234082
7.9728: 279149
8: 11541
8.11776: 283590 work:305061
8.12288: 279149 work:281368
8.13824: 234082 work:225786
10: 4050
bar:9504180:1076805
2: 1056020
3: 1056020
main:20360:0
0: 0
2.1: 4045
3: 4156 foo:4267
5: 0

View File

@ -1,4 +1,7 @@
; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo
; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER
;
;;
;; C source code for the test (compiler at -O3):
;; // A test case for loop unroll.
@ -50,6 +53,25 @@
; CHECK: .byte 1
; CHECK: .size __llvm_fs_discriminator__, 1
;; Check that new branch probs are generated.
; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
; LOADER: Set branch fs prob: MBB (1 -> 2): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
; LOADER: Set branch fs prob: MBB (3 -> 5): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x7aca7894 / 0x80000000 = 95.93%
; LOADER: Set branch fs prob: MBB (3 -> 4): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x0535876c / 0x80000000 = 4.07%
; LOADER: Set branch fs prob: MBB (5 -> 8): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x021c112e / 0x80000000 = 1.65%
; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35%
; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00%
; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00%
; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57%
; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43%
; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46%
; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54%
target triple = "x86_64-unknown-linux-gnu"
@sum = dso_local local_unnamed_addr global i32 0, align 4

View File

@ -284,6 +284,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
P->CSAction = PGOOptions::CSIRUse;
}
}
if (TM)
TM->setPGOOption(P);
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
CGSCCAnalysisManager CGAM;