forked from OSchip/llvm-project
[NewPM][AMDGPU] Port amdgpu-lower-kernel-attributes
And add it to the AMDGPU opt pipeline. This is a function pass instead of a module pass (like the legacy pass) because it's getting added to a CGSCCPassManager, and you can't put a module pass in a CGSCCPassManager. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D93885
This commit is contained in:
parent
b980bed34b
commit
7ecbe0c7a0
|
@ -115,6 +115,11 @@ ModulePass *createAMDGPULowerKernelAttributesPass();
|
|||
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
|
||||
extern char &AMDGPULowerKernelAttributesID;
|
||||
|
||||
struct AMDGPULowerKernelAttributesPass
|
||||
: PassInfoMixin<AMDGPULowerKernelAttributesPass> {
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
};
|
||||
|
||||
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
|
||||
extern char &AMDGPUPropagateAttributesEarlyID;
|
||||
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
|
@ -41,16 +43,11 @@ enum DispatchPackedOffsets {
|
|||
};
|
||||
|
||||
class AMDGPULowerKernelAttributes : public ModulePass {
|
||||
Module *Mod = nullptr;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPULowerKernelAttributes() : ModulePass(ID) {}
|
||||
|
||||
bool processUse(CallInst *CI);
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
|
@ -64,12 +61,7 @@ public:
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
bool AMDGPULowerKernelAttributes::doInitialization(Module &M) {
|
||||
Mod = &M;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) {
|
||||
static bool processUse(CallInst *CI) {
|
||||
Function *F = CI->getParent()->getParent();
|
||||
|
||||
auto MD = F->getMetadata("reqd_work_group_size");
|
||||
|
@ -89,7 +81,7 @@ bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) {
|
|||
Value *GridSizeY = nullptr;
|
||||
Value *GridSizeZ = nullptr;
|
||||
|
||||
const DataLayout &DL = Mod->getDataLayout();
|
||||
const DataLayout &DL = F->getParent()->getDataLayout();
|
||||
|
||||
// We expect to see several GEP users, casted to the appropriate type and
|
||||
// loaded.
|
||||
|
@ -239,7 +231,7 @@ bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
|
|||
StringRef DispatchPtrName
|
||||
= Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr);
|
||||
|
||||
Function *DispatchPtr = Mod->getFunction(DispatchPtrName);
|
||||
Function *DispatchPtr = M.getFunction(DispatchPtrName);
|
||||
if (!DispatchPtr) // Dispatch ptr not used.
|
||||
return false;
|
||||
|
||||
|
@ -267,3 +259,22 @@ char AMDGPULowerKernelAttributes::ID = 0;
|
|||
ModulePass *llvm::createAMDGPULowerKernelAttributesPass() {
|
||||
return new AMDGPULowerKernelAttributes();
|
||||
}
|
||||
|
||||
PreservedAnalyses
|
||||
AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
StringRef DispatchPtrName =
|
||||
Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr);
|
||||
|
||||
Function *DispatchPtr = F.getParent()->getFunction(DispatchPtrName);
|
||||
if (!DispatchPtr) // Dispatch ptr not used.
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
for (Instruction &I : instructions(F)) {
|
||||
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
|
||||
if (CI->getCalledFunction() == DispatchPtr)
|
||||
processUse(CI);
|
||||
}
|
||||
}
|
||||
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
|
|
@ -508,6 +508,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
|
|||
PM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
|
||||
return true;
|
||||
}
|
||||
if (PassName == "amdgpu-lower-kernel-attributes") {
|
||||
PM.addPass(AMDGPULowerKernelAttributesPass());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
|
@ -530,6 +534,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
|
|||
// but before SROA to increase SROA opportunities.
|
||||
FPM.addPass(InferAddressSpacesPass());
|
||||
|
||||
// This should run after inlining to have any chance of doing
|
||||
// anything, and before other cleanup optimizations.
|
||||
FPM.addPass(AMDGPULowerKernelAttributesPass());
|
||||
|
||||
if (Level != PassBuilder::OptimizationLevel::O0) {
|
||||
// Promote alloca to vector before SROA and loop unroll. If we
|
||||
// manage to eliminate allocas before unroll we may choose to unroll
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s
|
||||
|
||||
target datalayout = "n32"
|
||||
|
||||
|
|
|
@ -464,7 +464,7 @@ struct TimeTracerRAII {
|
|||
static bool shouldPinPassToLegacyPM(StringRef Pass) {
|
||||
std::vector<StringRef> PassNameExactToIgnore = {
|
||||
"amdgpu-simplifylib", "amdgpu-usenative", "amdgpu-promote-alloca",
|
||||
"amdgpu-promote-alloca-to-vector"};
|
||||
"amdgpu-promote-alloca-to-vector", "amdgpu-lower-kernel-attributes"};
|
||||
for (const auto &P : PassNameExactToIgnore)
|
||||
if (Pass == P)
|
||||
return false;
|
||||
|
|
Loading…
Reference in New Issue