forked from OSchip/llvm-project
AMDGPU: Make AMDGPUPerfHintAnalysis an SCC pass
Add a string attribute instead of directly setting MachineFunctionInfo. This avoids trying to get the analysis in the MachineFunctionInfo in a way that doesn't work with the new pass manager. This will also avoid re-visiting the call graph for every single function. llvm-svn: 365241
This commit is contained in:
parent
8d6ea2d48c
commit
e7e23e3e91
|
@ -139,7 +139,6 @@ public:
|
|||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
||||
AU.addRequired<AMDGPUPerfHintAnalysis>();
|
||||
AU.addRequired<LegacyDivergenceAnalysis>();
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
|
|
|
@ -29,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
|||
// except reserved size is not correctly aligned.
|
||||
const Function &F = MF.getFunction();
|
||||
|
||||
if (auto *Resolver = MF.getMMI().getResolver()) {
|
||||
if (AMDGPUPerfHintAnalysis *PHA = static_cast<AMDGPUPerfHintAnalysis*>(
|
||||
Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) {
|
||||
MemoryBound = PHA->isMemoryBound(&F);
|
||||
WaveLimiter = PHA->needsWaveLimiter(&F);
|
||||
}
|
||||
}
|
||||
Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
|
||||
MemoryBound = MemBoundAttr.isStringAttribute() &&
|
||||
MemBoundAttr.getValueAsString() == "true";
|
||||
|
||||
Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
|
||||
WaveLimiter = WaveLimitAttr.isStringAttribute() &&
|
||||
WaveLimitAttr.getValueAsString() == "true";
|
||||
|
||||
CallingConv::ID CC = F.getCallingConv();
|
||||
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
|
@ -71,7 +72,7 @@ public:
|
|||
const TargetLowering *TLI_)
|
||||
: FIM(FIM_), DL(nullptr), TLI(TLI_) {}
|
||||
|
||||
void runOnFunction(Function &F);
|
||||
bool runOnFunction(Function &F);
|
||||
|
||||
private:
|
||||
struct MemAccessInfo {
|
||||
|
@ -100,7 +101,7 @@ private:
|
|||
|
||||
const TargetLowering *TLI;
|
||||
|
||||
void visit(const Function &F);
|
||||
AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
|
||||
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
|
||||
static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
|
||||
|
||||
|
@ -202,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUPerfHint::visit(const Function &F) {
|
||||
auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo()));
|
||||
if (!FIP.second)
|
||||
return;
|
||||
|
||||
AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;
|
||||
AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
|
||||
AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
|
||||
|
||||
LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
|
||||
|
||||
|
@ -233,7 +230,6 @@ void AMDGPUPerfHint::visit(const Function &F) {
|
|||
if (&F == Callee) // Handle immediate recursion
|
||||
continue;
|
||||
|
||||
visit(*Callee);
|
||||
auto Loc = FIM.find(Callee);
|
||||
|
||||
assert(Loc != FIM.end() && "No func info");
|
||||
|
@ -256,36 +252,39 @@ void AMDGPUPerfHint::visit(const Function &F) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &FI;
|
||||
}
|
||||
|
||||
void AMDGPUPerfHint::runOnFunction(Function &F) {
|
||||
if (FIM.find(&F) != FIM.end())
|
||||
return;
|
||||
|
||||
bool AMDGPUPerfHint::runOnFunction(Function &F) {
|
||||
const Module &M = *F.getParent();
|
||||
DL = &M.getDataLayout();
|
||||
|
||||
visit(F);
|
||||
auto Loc = FIM.find(&F);
|
||||
if (F.hasFnAttribute("amdgpu-wave-limiter") &&
|
||||
F.hasFnAttribute("amdgpu-memory-bound"))
|
||||
return false;
|
||||
|
||||
assert(Loc != FIM.end() && "No func info");
|
||||
LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
|
||||
const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
|
||||
|
||||
LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
|
||||
<< '\n'
|
||||
<< " IAMInst: " << Loc->second.IAMInstCount << '\n'
|
||||
<< " LSMInst: " << Loc->second.LSMInstCount << '\n'
|
||||
<< " TotalInst: " << Loc->second.InstCount << '\n');
|
||||
<< " IAMInst: " << Info->IAMInstCount << '\n'
|
||||
<< " LSMInst: " << Info->LSMInstCount << '\n'
|
||||
<< " TotalInst: " << Info->InstCount << '\n');
|
||||
|
||||
auto &FI = Loc->second;
|
||||
|
||||
if (isMemBound(FI)) {
|
||||
if (isMemBound(*Info)) {
|
||||
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
|
||||
NumMemBound++;
|
||||
F.addFnAttr("amdgpu-memory-bound", "true");
|
||||
}
|
||||
|
||||
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) {
|
||||
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
|
||||
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
|
||||
NumLimitWave++;
|
||||
F.addFnAttr("amdgpu-wave-limiter", "true");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
|
||||
|
@ -364,17 +363,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
|
|||
}
|
||||
} // namespace
|
||||
|
||||
bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) {
|
||||
bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
|
||||
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
|
||||
if (!TPC)
|
||||
return false;
|
||||
|
||||
const TargetMachine &TM = TPC->getTM<TargetMachine>();
|
||||
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);
|
||||
|
||||
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
|
||||
Analyzer.runOnFunction(F);
|
||||
return false;
|
||||
bool Changed = false;
|
||||
for (CallGraphNode *I : SCC) {
|
||||
Function *F = I->getFunction();
|
||||
if (!F || F->isDeclaration())
|
||||
continue;
|
||||
|
||||
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
|
||||
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
|
||||
|
||||
if (Analyzer.runOnFunction(*F))
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===//
|
||||
//===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
|
@ -14,18 +14,20 @@
|
|||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
|
||||
|
||||
#include "llvm/Analysis/CallGraphSCCPass.h"
|
||||
#include "llvm/IR/ValueMap.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
struct AMDGPUPerfHintAnalysis : public FunctionPass {
|
||||
struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
|
||||
static char ID;
|
||||
|
||||
public:
|
||||
AMDGPUPerfHintAnalysis() : FunctionPass(ID) {}
|
||||
AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesAll();
|
||||
|
|
|
@ -740,6 +740,8 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
|
|||
EnableLowerKernelArguments)
|
||||
addPass(createAMDGPULowerKernelArgumentsPass());
|
||||
|
||||
addPass(&AMDGPUPerfHintAnalysisID);
|
||||
|
||||
TargetPassConfig::addCodeGenPrepare();
|
||||
|
||||
if (EnableLoadStoreVectorizer)
|
||||
|
|
Loading…
Reference in New Issue