From 7ecbe0c7a01848fce88dcf3b6977cec866e9938b Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 28 Dec 2020 13:48:34 -0800 Subject: [PATCH] [NewPM][AMDGPU] Port amdgpu-lower-kernel-attributes And add it to the AMDGPU opt pipeline. This is a function pass instead of a module pass (like the legacy pass) because it's getting added to a CGSCCPassManager, and you can't put a module pass in a CGSCCPassManager. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D93885 --- llvm/lib/Target/AMDGPU/AMDGPU.h | 5 +++ .../AMDGPU/AMDGPULowerKernelAttributes.cpp | 37 ++++++++++++------- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 ++++ .../CodeGen/AMDGPU/reqd-work-group-size.ll | 1 + llvm/tools/opt/opt.cpp | 2 +- 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 6a0ba20e8026..623bbb2db325 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -115,6 +115,11 @@ ModulePass *createAMDGPULowerKernelAttributesPass(); void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); extern char &AMDGPULowerKernelAttributesID; +struct AMDGPULowerKernelAttributesPass + : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); extern char &AMDGPUPropagateAttributesEarlyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 00e12f808783..3406734d7c7f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -19,7 +19,9 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" @@ -41,16 +43,11 @@ enum DispatchPackedOffsets { }; class AMDGPULowerKernelAttributes : public ModulePass { - Module *Mod = nullptr; - public: static char ID; AMDGPULowerKernelAttributes() : ModulePass(ID) {} - bool processUse(CallInst *CI); - - bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; StringRef getPassName() const override { @@ -64,12 +61,7 @@ public: } // end anonymous namespace -bool AMDGPULowerKernelAttributes::doInitialization(Module &M) { - Mod = &M; - return false; -} - -bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) { +static bool processUse(CallInst *CI) { Function *F = CI->getParent()->getParent(); auto MD = F->getMetadata("reqd_work_group_size"); @@ -89,7 +81,7 @@ bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) { Value *GridSizeY = nullptr; Value *GridSizeZ = nullptr; - const DataLayout &DL = Mod->getDataLayout(); + const DataLayout &DL = F->getParent()->getDataLayout(); // We expect to see several GEP users, casted to the appropriate type and // loaded. @@ -239,7 +231,7 @@ bool AMDGPULowerKernelAttributes::runOnModule(Module &M) { StringRef DispatchPtrName = Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr); - Function *DispatchPtr = Mod->getFunction(DispatchPtrName); + Function *DispatchPtr = M.getFunction(DispatchPtrName); if (!DispatchPtr) // Dispatch ptr not used. return false; @@ -267,3 +259,22 @@ char AMDGPULowerKernelAttributes::ID = 0; ModulePass *llvm::createAMDGPULowerKernelAttributesPass() { return new AMDGPULowerKernelAttributes(); } + +PreservedAnalyses +AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) { + StringRef DispatchPtrName = + Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr); + + Function *DispatchPtr = F.getParent()->getFunction(DispatchPtrName); + if (!DispatchPtr) // Dispatch ptr not used. + return PreservedAnalyses::all(); + + for (Instruction &I : instructions(F)) { + if (CallInst *CI = dyn_cast(&I)) { + if (CI->getCalledFunction() == DispatchPtr) + processUse(CI); + } + } + + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 26d76dd7fede..7a09c91e62d7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -508,6 +508,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, PM.addPass(AMDGPUPromoteAllocaToVectorPass(*this)); return true; } + if (PassName == "amdgpu-lower-kernel-attributes") { + PM.addPass(AMDGPULowerKernelAttributesPass()); + return true; + } return false; }); @@ -530,6 +534,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, // but before SROA to increase SROA opportunities. FPM.addPass(InferAddressSpacesPass()); + // This should run after inlining to have any chance of doing + // anything, and before other cleanup optimizations. + FPM.addPass(AMDGPULowerKernelAttributesPass()); + if (Level != PassBuilder::OptimizationLevel::O0) { // Promote alloca to vector before SROA and loop unroll. If we // manage to eliminate allocas before unroll we may choose to unroll diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll index 40b1db00dcdb..49caf2ec755b 100644 --- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s target datalayout = "n32" diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index cd2bb7fd9833..094d79d742dd 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -464,7 +464,7 @@ struct TimeTracerRAII { static bool shouldPinPassToLegacyPM(StringRef Pass) { std::vector PassNameExactToIgnore = { "amdgpu-simplifylib", "amdgpu-usenative", "amdgpu-promote-alloca", - "amdgpu-promote-alloca-to-vector"}; + "amdgpu-promote-alloca-to-vector", "amdgpu-lower-kernel-attributes"}; for (const auto &P : PassNameExactToIgnore) if (Pass == P) return false;