AMDGPU: Make AMDGPUPerfHintAnalysis an SCC pass

Add a string attribute instead of directly setting
MachineFunctionInfo. This avoids trying to get the analysis in the
MachineFunctionInfo in a way that doesn't work with the new pass
manager.

This will also avoid re-visiting the call graph for every single
function.

llvm-svn: 365241
This commit is contained in:
Matt Arsenault 2019-07-05 20:26:13 +00:00
parent 8d6ea2d48c
commit e7e23e3e91
5 changed files with 53 additions and 41 deletions

View File

@ -139,7 +139,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override { void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>(); AU.addRequired<AMDGPUArgumentUsageInfo>();
AU.addRequired<AMDGPUPerfHintAnalysis>();
AU.addRequired<LegacyDivergenceAnalysis>(); AU.addRequired<LegacyDivergenceAnalysis>();
#ifdef EXPENSIVE_CHECKS #ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>();

View File

@ -29,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
// except reserved size is not correctly aligned. // except reserved size is not correctly aligned.
const Function &F = MF.getFunction(); const Function &F = MF.getFunction();
if (auto *Resolver = MF.getMMI().getResolver()) { Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
if (AMDGPUPerfHintAnalysis *PHA = static_cast<AMDGPUPerfHintAnalysis*>( MemoryBound = MemBoundAttr.isStringAttribute() &&
Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) { MemBoundAttr.getValueAsString() == "true";
MemoryBound = PHA->isMemoryBound(&F);
WaveLimiter = PHA->needsWaveLimiter(&F); Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
} WaveLimiter = WaveLimitAttr.isStringAttribute() &&
} WaveLimitAttr.getValueAsString() == "true";
CallingConv::ID CC = F.getCallingConv(); CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)

View File

@ -17,6 +17,7 @@
#include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetPassConfig.h"
@ -71,7 +72,7 @@ public:
const TargetLowering *TLI_) const TargetLowering *TLI_)
: FIM(FIM_), DL(nullptr), TLI(TLI_) {} : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
void runOnFunction(Function &F); bool runOnFunction(Function &F);
private: private:
struct MemAccessInfo { struct MemAccessInfo {
@ -100,7 +101,7 @@ private:
const TargetLowering *TLI; const TargetLowering *TLI;
void visit(const Function &F); AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F); static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F); static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
@ -202,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
return false; return false;
} }
void AMDGPUPerfHint::visit(const Function &F) { AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo())); AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
if (!FIP.second)
return;
AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;
LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n'); LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
@ -233,7 +230,6 @@ void AMDGPUPerfHint::visit(const Function &F) {
if (&F == Callee) // Handle immediate recursion if (&F == Callee) // Handle immediate recursion
continue; continue;
visit(*Callee);
auto Loc = FIM.find(Callee); auto Loc = FIM.find(Callee);
assert(Loc != FIM.end() && "No func info"); assert(Loc != FIM.end() && "No func info");
@ -256,36 +252,39 @@ void AMDGPUPerfHint::visit(const Function &F) {
} }
} }
} }
return &FI;
} }
void AMDGPUPerfHint::runOnFunction(Function &F) { bool AMDGPUPerfHint::runOnFunction(Function &F) {
if (FIM.find(&F) != FIM.end())
return;
const Module &M = *F.getParent(); const Module &M = *F.getParent();
DL = &M.getDataLayout(); DL = &M.getDataLayout();
visit(F); if (F.hasFnAttribute("amdgpu-wave-limiter") &&
auto Loc = FIM.find(&F); F.hasFnAttribute("amdgpu-memory-bound"))
return false;
assert(Loc != FIM.end() && "No func info"); const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
<< '\n' << '\n'
<< " IAMInst: " << Loc->second.IAMInstCount << '\n' << " IAMInst: " << Info->IAMInstCount << '\n'
<< " LSMInst: " << Loc->second.LSMInstCount << '\n' << " LSMInst: " << Info->LSMInstCount << '\n'
<< " TotalInst: " << Loc->second.InstCount << '\n'); << " TotalInst: " << Info->InstCount << '\n');
auto &FI = Loc->second; if (isMemBound(*Info)) {
if (isMemBound(FI)) {
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n"); LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
NumMemBound++; NumMemBound++;
F.addFnAttr("amdgpu-memory-bound", "true");
} }
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) { if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n"); LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
NumLimitWave++; NumLimitWave++;
F.addFnAttr("amdgpu-wave-limiter", "true");
} }
return true;
} }
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) { bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
@ -364,17 +363,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
} }
} // namespace } // namespace
bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) { bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC) if (!TPC)
return false; return false;
const TargetMachine &TM = TPC->getTM<TargetMachine>(); const TargetMachine &TM = TPC->getTM<TargetMachine>();
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering()); bool Changed = false;
Analyzer.runOnFunction(F); for (CallGraphNode *I : SCC) {
return false; Function *F = I->getFunction();
if (!F || F->isDeclaration())
continue;
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
if (Analyzer.runOnFunction(*F))
Changed = true;
}
return Changed;
} }
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const { bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {

View File

@ -1,4 +1,4 @@
//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===// //===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.
@ -14,18 +14,20 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H #ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H #define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/ValueMap.h" #include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h" #include "llvm/Pass.h"
namespace llvm { namespace llvm {
struct AMDGPUPerfHintAnalysis : public FunctionPass { struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
static char ID; static char ID;
public: public:
AMDGPUPerfHintAnalysis() : FunctionPass(ID) {} AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
bool runOnFunction(Function &F) override; bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override { void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll(); AU.setPreservesAll();

View File

@ -740,6 +740,8 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
EnableLowerKernelArguments) EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass()); addPass(createAMDGPULowerKernelArgumentsPass());
addPass(&AMDGPUPerfHintAnalysisID);
TargetPassConfig::addCodeGenPrepare(); TargetPassConfig::addCodeGenPrepare();
if (EnableLoadStoreVectorizer) if (EnableLoadStoreVectorizer)