From e7e23e3e91e32263579db5cea49302de7ccf88b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 5 Jul 2019 20:26:13 +0000 Subject: [PATCH] AMDGPU: Make AMDGPUPerfHintAnalysis an SCC pass Add a string attribute instead of directly setting MachineFunctionInfo. This avoids trying to get the analysis in the MachineFunctionInfo in a way that doesn't work with the new pass manager. This will also avoid re-visiting the call graph for every single function. llvm-svn: 365241 --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 1 - .../Target/AMDGPU/AMDGPUMachineFunction.cpp | 14 ++-- .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 67 +++++++++++-------- .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h | 10 +-- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 + 5 files changed, 53 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index afd3d506ade8..b3589a02fcda 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -139,7 +139,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); AU.addRequired(); #ifdef EXPENSIVE_CHECKS AU.addRequired(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 05d5e2ec4007..0d3a1f1a769f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -29,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : // except reserved size is not correctly aligned. const Function &F = MF.getFunction(); - if (auto *Resolver = MF.getMMI().getResolver()) { - if (AMDGPUPerfHintAnalysis *PHA = static_cast( - Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) { - MemoryBound = PHA->isMemoryBound(&F); - WaveLimiter = PHA->needsWaveLimiter(&F); - } - } + Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); + MemoryBound = MemBoundAttr.isStringAttribute() && + MemBoundAttr.getValueAsString() == "true"; + + Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); + WaveLimiter = WaveLimitAttr.isStringAttribute() && + WaveLimitAttr.getValueAsString() == "true"; CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index e9ebb912b08b..e6d41236d441 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -17,6 +17,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -71,7 +72,7 @@ public: const TargetLowering *TLI_) : FIM(FIM_), DL(nullptr), TLI(TLI_) {} - void runOnFunction(Function &F); + bool runOnFunction(Function &F); private: struct MemAccessInfo { @@ -100,7 +101,7 @@ private: const TargetLowering *TLI; - void visit(const Function &F); + AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F); static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F); static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F); @@ -202,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { return false; } -void AMDGPUPerfHint::visit(const Function &F) { - auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo())); - if (!FIP.second) - return; - - AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second; +AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { + AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F]; LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n'); @@ -233,7 +230,6 @@ void AMDGPUPerfHint::visit(const Function &F) { if (&F == Callee) // Handle immediate recursion continue; - visit(*Callee); auto Loc = FIM.find(Callee); assert(Loc != FIM.end() && "No func info"); @@ -256,36 +252,39 @@ void AMDGPUPerfHint::visit(const Function &F) { } } } + + return &FI; } -void AMDGPUPerfHint::runOnFunction(Function &F) { - if (FIM.find(&F) != FIM.end()) - return; - +bool AMDGPUPerfHint::runOnFunction(Function &F) { const Module &M = *F.getParent(); DL = &M.getDataLayout(); - visit(F); - auto Loc = FIM.find(&F); + if (F.hasFnAttribute("amdgpu-wave-limiter") && + F.hasFnAttribute("amdgpu-memory-bound")) + return false; - assert(Loc != FIM.end() && "No func info"); - LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount + const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F); + + LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount << '\n' - << " IAMInst: " << Loc->second.IAMInstCount << '\n' - << " LSMInst: " << Loc->second.LSMInstCount << '\n' - << " TotalInst: " << Loc->second.InstCount << '\n'); + << " IAMInst: " << Info->IAMInstCount << '\n' + << " LSMInst: " << Info->LSMInstCount << '\n' + << " TotalInst: " << Info->InstCount << '\n'); - auto &FI = Loc->second; - - if (isMemBound(FI)) { + if (isMemBound(*Info)) { LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n"); NumMemBound++; + F.addFnAttr("amdgpu-memory-bound", "true"); } - if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) { + if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) { LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n"); NumLimitWave++; + F.addFnAttr("amdgpu-wave-limiter", "true"); } + + return true; } bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) { @@ -364,17 +363,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride( } } // namespace -bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) { +bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) { auto *TPC = getAnalysisIfAvailable(); if (!TPC) return false; const TargetMachine &TM = TPC->getTM(); - const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F); - AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering()); - Analyzer.runOnFunction(F); - return false; + bool Changed = false; + for (CallGraphNode *I : SCC) { + Function *F = I->getFunction(); + if (!F || F->isDeclaration()) + continue; + + const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F); + AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering()); + + if (Analyzer.runOnFunction(*F)) + Changed = true; + } + + return Changed; } bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index 88b9c96a9148..9599e09fbd96 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -1,4 +1,4 @@ -//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===// +//===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,18 +14,20 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H #define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H + +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" namespace llvm { -struct AMDGPUPerfHintAnalysis : public FunctionPass { +struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass { static char ID; public: - AMDGPUPerfHintAnalysis() : FunctionPass(ID) {} + AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {} - bool runOnFunction(Function &F) override; + bool runOnSCC(CallGraphSCC &SCC) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 87a820a395d4..7414519aee16 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -740,6 +740,8 @@ void AMDGPUPassConfig::addCodeGenPrepare() { EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); + addPass(&AMDGPUPerfHintAnalysisID); + TargetPassConfig::addCodeGenPrepare(); if (EnableLoadStoreVectorizer)