forked from OSchip/llvm-project
AMDGPU: Make AMDGPUPerfHintAnalysis an SCC pass
Add a string attribute instead of directly setting MachineFunctionInfo. This avoids trying to get the analysis in the MachineFunctionInfo in a way that doesn't work with the new pass manager. This will also avoid re-visiting the call graph for every single function. llvm-svn: 365241
This commit is contained in:
parent
8d6ea2d48c
commit
e7e23e3e91
|
@ -139,7 +139,6 @@ public:
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
||||||
AU.addRequired<AMDGPUPerfHintAnalysis>();
|
|
||||||
AU.addRequired<LegacyDivergenceAnalysis>();
|
AU.addRequired<LegacyDivergenceAnalysis>();
|
||||||
#ifdef EXPENSIVE_CHECKS
|
#ifdef EXPENSIVE_CHECKS
|
||||||
AU.addRequired<DominatorTreeWrapperPass>();
|
AU.addRequired<DominatorTreeWrapperPass>();
|
||||||
|
|
|
@ -29,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
||||||
// except reserved size is not correctly aligned.
|
// except reserved size is not correctly aligned.
|
||||||
const Function &F = MF.getFunction();
|
const Function &F = MF.getFunction();
|
||||||
|
|
||||||
if (auto *Resolver = MF.getMMI().getResolver()) {
|
Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
|
||||||
if (AMDGPUPerfHintAnalysis *PHA = static_cast<AMDGPUPerfHintAnalysis*>(
|
MemoryBound = MemBoundAttr.isStringAttribute() &&
|
||||||
Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) {
|
MemBoundAttr.getValueAsString() == "true";
|
||||||
MemoryBound = PHA->isMemoryBound(&F);
|
|
||||||
WaveLimiter = PHA->needsWaveLimiter(&F);
|
Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
|
||||||
}
|
WaveLimiter = WaveLimitAttr.isStringAttribute() &&
|
||||||
}
|
WaveLimitAttr.getValueAsString() == "true";
|
||||||
|
|
||||||
CallingConv::ID CC = F.getCallingConv();
|
CallingConv::ID CC = F.getCallingConv();
|
||||||
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
|
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "Utils/AMDGPUBaseInfo.h"
|
#include "Utils/AMDGPUBaseInfo.h"
|
||||||
#include "llvm/ADT/SmallSet.h"
|
#include "llvm/ADT/SmallSet.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
#include "llvm/Analysis/CallGraph.h"
|
||||||
#include "llvm/Analysis/ValueTracking.h"
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
#include "llvm/CodeGen/TargetLowering.h"
|
#include "llvm/CodeGen/TargetLowering.h"
|
||||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||||
|
@ -71,7 +72,7 @@ public:
|
||||||
const TargetLowering *TLI_)
|
const TargetLowering *TLI_)
|
||||||
: FIM(FIM_), DL(nullptr), TLI(TLI_) {}
|
: FIM(FIM_), DL(nullptr), TLI(TLI_) {}
|
||||||
|
|
||||||
void runOnFunction(Function &F);
|
bool runOnFunction(Function &F);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct MemAccessInfo {
|
struct MemAccessInfo {
|
||||||
|
@ -100,7 +101,7 @@ private:
|
||||||
|
|
||||||
const TargetLowering *TLI;
|
const TargetLowering *TLI;
|
||||||
|
|
||||||
void visit(const Function &F);
|
AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
|
||||||
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
|
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
|
||||||
static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
|
static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
|
||||||
|
|
||||||
|
@ -202,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUPerfHint::visit(const Function &F) {
|
AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
|
||||||
auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo()));
|
AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
|
||||||
if (!FIP.second)
|
|
||||||
return;
|
|
||||||
|
|
||||||
AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;
|
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
|
LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
|
||||||
|
|
||||||
|
@ -233,7 +230,6 @@ void AMDGPUPerfHint::visit(const Function &F) {
|
||||||
if (&F == Callee) // Handle immediate recursion
|
if (&F == Callee) // Handle immediate recursion
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
visit(*Callee);
|
|
||||||
auto Loc = FIM.find(Callee);
|
auto Loc = FIM.find(Callee);
|
||||||
|
|
||||||
assert(Loc != FIM.end() && "No func info");
|
assert(Loc != FIM.end() && "No func info");
|
||||||
|
@ -256,36 +252,39 @@ void AMDGPUPerfHint::visit(const Function &F) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return &FI;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUPerfHint::runOnFunction(Function &F) {
|
bool AMDGPUPerfHint::runOnFunction(Function &F) {
|
||||||
if (FIM.find(&F) != FIM.end())
|
|
||||||
return;
|
|
||||||
|
|
||||||
const Module &M = *F.getParent();
|
const Module &M = *F.getParent();
|
||||||
DL = &M.getDataLayout();
|
DL = &M.getDataLayout();
|
||||||
|
|
||||||
visit(F);
|
if (F.hasFnAttribute("amdgpu-wave-limiter") &&
|
||||||
auto Loc = FIM.find(&F);
|
F.hasFnAttribute("amdgpu-memory-bound"))
|
||||||
|
return false;
|
||||||
|
|
||||||
assert(Loc != FIM.end() && "No func info");
|
const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
|
||||||
LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
|
|
||||||
|
LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
|
||||||
<< '\n'
|
<< '\n'
|
||||||
<< " IAMInst: " << Loc->second.IAMInstCount << '\n'
|
<< " IAMInst: " << Info->IAMInstCount << '\n'
|
||||||
<< " LSMInst: " << Loc->second.LSMInstCount << '\n'
|
<< " LSMInst: " << Info->LSMInstCount << '\n'
|
||||||
<< " TotalInst: " << Loc->second.InstCount << '\n');
|
<< " TotalInst: " << Info->InstCount << '\n');
|
||||||
|
|
||||||
auto &FI = Loc->second;
|
if (isMemBound(*Info)) {
|
||||||
|
|
||||||
if (isMemBound(FI)) {
|
|
||||||
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
|
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
|
||||||
NumMemBound++;
|
NumMemBound++;
|
||||||
|
F.addFnAttr("amdgpu-memory-bound", "true");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) {
|
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
|
||||||
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
|
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
|
||||||
NumLimitWave++;
|
NumLimitWave++;
|
||||||
|
F.addFnAttr("amdgpu-wave-limiter", "true");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
|
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
|
||||||
|
@ -364,17 +363,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) {
|
bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
|
||||||
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
|
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
|
||||||
if (!TPC)
|
if (!TPC)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const TargetMachine &TM = TPC->getTM<TargetMachine>();
|
const TargetMachine &TM = TPC->getTM<TargetMachine>();
|
||||||
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);
|
|
||||||
|
|
||||||
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
|
bool Changed = false;
|
||||||
Analyzer.runOnFunction(F);
|
for (CallGraphNode *I : SCC) {
|
||||||
return false;
|
Function *F = I->getFunction();
|
||||||
|
if (!F || F->isDeclaration())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
|
||||||
|
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
|
||||||
|
|
||||||
|
if (Analyzer.runOnFunction(*F))
|
||||||
|
Changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
|
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===//
|
//===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===//
|
||||||
//
|
//
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
@ -14,18 +14,20 @@
|
||||||
|
|
||||||
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
|
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
|
||||||
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
|
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
|
||||||
|
|
||||||
|
#include "llvm/Analysis/CallGraphSCCPass.h"
|
||||||
#include "llvm/IR/ValueMap.h"
|
#include "llvm/IR/ValueMap.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
struct AMDGPUPerfHintAnalysis : public FunctionPass {
|
struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
|
||||||
static char ID;
|
static char ID;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AMDGPUPerfHintAnalysis() : FunctionPass(ID) {}
|
AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
|
||||||
|
|
||||||
bool runOnFunction(Function &F) override;
|
bool runOnSCC(CallGraphSCC &SCC) override;
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.setPreservesAll();
|
AU.setPreservesAll();
|
||||||
|
|
|
@ -740,6 +740,8 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
|
||||||
EnableLowerKernelArguments)
|
EnableLowerKernelArguments)
|
||||||
addPass(createAMDGPULowerKernelArgumentsPass());
|
addPass(createAMDGPULowerKernelArgumentsPass());
|
||||||
|
|
||||||
|
addPass(&AMDGPUPerfHintAnalysisID);
|
||||||
|
|
||||||
TargetPassConfig::addCodeGenPrepare();
|
TargetPassConfig::addCodeGenPrepare();
|
||||||
|
|
||||||
if (EnableLoadStoreVectorizer)
|
if (EnableLoadStoreVectorizer)
|
||||||
|
|
Loading…
Reference in New Issue