[LPM] Port CGProfilePass from NPM to LPM

Reviewers: hans, chandlerc!, asbirlea, nikic

Reviewed By: hans, nikic

Subscribers: steven_wu, dexonsmith, nikic, echristo, void, zhizhouy, cfe-commits, aeubanks, MaskRay, jvesely, nhaehnle, hiraditya, kerbowa, llvm-commits

Tags: #llvm, #clang

Differential Revision: https://reviews.llvm.org/D83013
This commit is contained in:
Zequan Wu 2020-07-08 12:30:28 -07:00
parent 1d542f0ca8
commit 1fbb719470
17 changed files with 143 additions and 54 deletions

View File

@ -254,7 +254,6 @@ CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables.
CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
CODEGENOPT(CallGraphProfile , 1, 0) ///< Run call graph profile.
/// Attempt to use register sized accesses to bit-fields in structures, when
/// possible.

View File

@ -620,6 +620,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
// Only enable CGProfilePass when using integrated assembler, since
// non-integrated assemblers don't recognize .cgprofile section.
PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
// Loop interleaving in the loop vectorizer has historically been set to be
@ -1144,7 +1147,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
PTO.CallGraphProfile = CodeGenOpts.CallGraphProfile;
// Only enable CGProfilePass when using integrated assembler, since
// non-integrated assemblers don't recognize .cgprofile section.
PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
PTO.Coroutines = LangOpts.Coroutines;
PassInstrumentationCallbacks PIC;
@ -1562,7 +1567,9 @@ static void runThinLTOBackend(
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
Conf.PTO.CallGraphProfile = CGOpts.CallGraphProfile;
// Only enable CGProfilePass when using integrated assembler, since
// non-integrated assemblers don't recognize .cgprofile section.
Conf.PTO.CallGraphProfile = !CGOpts.DisableIntegratedAS;
// Context sensitive profile.
if (CGOpts.hasProfileCSIRInstr()) {

View File

@ -860,7 +860,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.RerollLoops = Args.hasArg(OPT_freroll_loops);
Opts.DisableIntegratedAS = Args.hasArg(OPT_fno_integrated_as);
Opts.CallGraphProfile = !Opts.DisableIntegratedAS;
Opts.Autolink = !Args.hasArg(OPT_fno_autolink);
Opts.SampleProfileFile =
std::string(Args.getLastArgValue(OPT_fprofile_sample_use_EQ));

View File

@ -103,6 +103,7 @@ void initializeCFGViewerLegacyPassPass(PassRegistry&);
void initializeCFIInstrInserterPass(PassRegistry&);
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
void initializeCGProfileLegacyPassPass(PassRegistry &);
void initializeCallGraphDOTPrinterPass(PassRegistry&);
void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
void initializeCallGraphViewerPass(PassRegistry&);

View File

@ -282,6 +282,8 @@ ModulePass *createSampleProfileLoaderPass(StringRef Name);
ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str,
raw_ostream *ThinLinkOS = nullptr);
ModulePass *createCGProfileLegacyPass();
} // End llvm namespace
#endif

View File

@ -156,6 +156,7 @@ public:
bool DisableTailCalls;
bool DisableUnrollLoops;
bool CallGraphProfile;
bool SLPVectorize;
bool LoopVectorize;
bool LoopsInterleaved;

View File

@ -19,11 +19,6 @@ namespace llvm {
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
void addModuleFlags(
Module &M,
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const;
};
} // end namespace llvm

View File

@ -248,10 +248,6 @@ static cl::opt<bool>
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
static cl::opt<bool> EnableCallGraphProfile(
"enable-npm-call-graph-profile", cl::init(true), cl::Hidden,
cl::desc("Enable call graph profile pass for the new PM (default = on)"));
/// Flag to enable inline deferral during PGO.
static cl::opt<bool>
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
@ -267,7 +263,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
Coroutines = false;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
CallGraphProfile = EnableCallGraphProfile;
CallGraphProfile = true;
}
extern cl::opt<bool> EnableHotColdSplit;

View File

@ -195,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() {
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = EnablePerformThinLTO;
DivergentTarget = false;
CallGraphProfile = true;
}
PassManagerBuilder::~PassManagerBuilder() {
@ -834,6 +835,10 @@ void PassManagerBuilder::populateModulePassManager(
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
// Add Module flag "CG Profile" based on Branch Frequency Information.
if (CallGraphProfile)
MPM.add(createCGProfileLegacyPass());
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM

View File

@ -10,22 +10,47 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Instrumentation.h"
#include <array>
using namespace llvm;
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
static bool
addModuleFlags(Module &M,
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
if (Counts.empty())
return false;
LLVMContext &Context = M.getContext();
MDBuilder MDB(Context);
std::vector<Metadata *> Nodes;
for (auto E : Counts) {
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
ValueAsMetadata::get(E.first.second),
MDB.createConstant(ConstantInt::get(
Type::getInt64Ty(Context), E.second))};
Nodes.push_back(MDNode::get(Context, Vals));
}
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
return true;
}
static bool runCGProfilePass(
Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) {
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
Function *CalledF, uint64_t NewCount) {
@ -35,14 +60,18 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
Count = SaturatingAdd(Count, NewCount);
};
// Ignore error here. Indirect calls are ignored if this fails.
(void)(bool)Symtab.create(M);
(void)(bool) Symtab.create(M);
for (auto &F : M) {
if (F.isDeclaration())
// Avoid extra cost of running passes for BFI when the function doesn't have
// entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check
// if using LazyBlockFrequencyInfoPass.
// TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM.
if (F.isDeclaration() || (LazyBFI && !F.getEntryCount()))
continue;
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto &BFI = GetBFI(F);
if (BFI.getEntryFreq() == 0)
continue;
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
TargetTransformInfo &TTI = GetTTI(F);
for (auto &BB : F) {
Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
if (!BBCount)
@ -69,28 +98,56 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
}
}
addModuleFlags(M, Counts);
return addModuleFlags(M, Counts);
}
namespace {
struct CGProfileLegacyPass final : public ModulePass {
static char ID;
CGProfileLegacyPass() : ModulePass(ID) {
initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<LazyBlockFrequencyInfoPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnModule(Module &M) override {
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
};
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
return runCGProfilePass(M, GetBFI, GetTTI, true);
}
};
} // namespace
char CGProfileLegacyPass::ID = 0;
INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
false)
ModulePass *llvm::createCGProfileLegacyPass() {
return new CGProfileLegacyPass();
}
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
runCGProfilePass(M, GetBFI, GetTTI, false);
return PreservedAnalyses::all();
}
void CGProfilePass::addModuleFlags(
Module &M,
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
if (Counts.empty())
return;
LLVMContext &Context = M.getContext();
MDBuilder MDB(Context);
std::vector<Metadata *> Nodes;
for (auto E : Counts) {
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
ValueAsMetadata::get(E.first.second),
MDB.createConstant(ConstantInt::get(
Type::getInt64Ty(Context), E.second))};
Nodes.push_back(MDNode::get(Context, Vals));
}
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
}

View File

@ -112,6 +112,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializePGOInstrumentationUseLegacyPassPass(Registry);
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
initializePGOMemOPSizeOptLegacyPassPass(Registry);
initializeCGProfileLegacyPassPass(Registry);
initializeInstrOrderFileLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
initializeMemorySanitizerLegacyPassPass(Registry);

View File

@ -276,6 +276,12 @@
; GCN-O1-NEXT: Warn about non-applied transformations
; GCN-O1-NEXT: Alignment from assumptions
; GCN-O1-NEXT: Strip Unused Function Prototypes
; GCN-O1-NEXT: Call Graph Profile
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Natural Loop Information
; GCN-O1-NEXT: Lazy Branch Probability Analysis
; GCN-O1-NEXT: Lazy Block Frequency Analysis
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Natural Loop Information
@ -623,6 +629,12 @@
; GCN-O2-NEXT: Strip Unused Function Prototypes
; GCN-O2-NEXT: Dead Global Elimination
; GCN-O2-NEXT: Merge Duplicate Global Constants
; GCN-O2-NEXT: Call Graph Profile
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Lazy Branch Probability Analysis
; GCN-O2-NEXT: Lazy Block Frequency Analysis
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Natural Loop Information
@ -975,6 +987,12 @@
; GCN-O3-NEXT: Strip Unused Function Prototypes
; GCN-O3-NEXT: Dead Global Elimination
; GCN-O3-NEXT: Merge Duplicate Global Constants
; GCN-O3-NEXT: Call Graph Profile
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Lazy Branch Probability Analysis
; GCN-O3-NEXT: Lazy Block Frequency Analysis
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Natural Loop Information

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -passes cg-profile -S | FileCheck %s
; RUN: opt < %s -cg-profile -S | FileCheck %s
declare void @b()

View File

@ -1,11 +0,0 @@
; RUN: opt -debug-pass-manager -passes='default<O2>' %s 2>&1 |FileCheck %s --check-prefixes=DEFAULT
; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=0 %s 2>&1 |FileCheck %s --check-prefixes=OFF
; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=1 %s 2>&1 |FileCheck %s --check-prefixes=ON
;
; DEFAULT: Running pass: CGProfilePass
; OFF-NOT: Running pass: CGProfilePass
; ON: Running pass: CGProfilePass
define void @foo() {
ret void
}

View File

@ -280,6 +280,12 @@
; CHECK-NEXT: Strip Unused Function Prototypes
; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: Merge Duplicate Global Constants
; CHECK-NEXT: Call Graph Profile
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information

View File

@ -285,6 +285,12 @@
; CHECK-NEXT: Strip Unused Function Prototypes
; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: Merge Duplicate Global Constants
; CHECK-NEXT: Call Graph Profile
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information

View File

@ -266,6 +266,12 @@
; CHECK-NEXT: Strip Unused Function Prototypes
; CHECK-NEXT: Dead Global Elimination
; CHECK-NEXT: Merge Duplicate Global Constants
; CHECK-NEXT: Call Graph Profile
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information