forked from OSchip/llvm-project
AMDGPU: Add analysis pass for function argument info
This will allow only adding necessary inputs to callee functions that need special inputs forwarded from the kernel. llvm-svn: 309996
This commit is contained in:
parent
2d30c64ae3
commit
7016f13450
|
@ -54,6 +54,8 @@ FunctionPass *createAMDGPUCodeGenPreparePass();
|
|||
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
|
||||
FunctionPass *createAMDGPURewriteOutArgumentsPass();
|
||||
|
||||
void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
|
||||
|
||||
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
|
||||
extern char &AMDGPUMachineCFGStructurizerID;
|
||||
|
||||
|
@ -124,8 +126,9 @@ void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
|
|||
extern char &AMDGPUPromoteAllocaID;
|
||||
|
||||
Pass *createAMDGPUStructurizeCFGPass();
|
||||
FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createAMDGPUISelDag(
|
||||
TargetMachine *TM = nullptr,
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
|
||||
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
|
||||
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
|
||||
FunctionPass *createAMDGPUAnnotateUniformValues();
|
||||
|
@ -167,6 +170,8 @@ extern char &AMDGPUUnifyDivergentExitNodesID;
|
|||
ImmutablePass *createAMDGPUAAWrapperPass();
|
||||
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
|
||||
|
||||
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
|
||||
|
||||
Target &getTheAMDGPUTarget();
|
||||
Target &getTheGCNTarget();
|
||||
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUArgumentUsageInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
|
||||
|
||||
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
|
||||
"Argument Register Usage Information Storage", false, true)
|
||||
|
||||
void ArgDescriptor::print(raw_ostream &OS,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
if (!isSet()) {
|
||||
OS << "<not set>\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (isRegister())
|
||||
OS << "Reg " << PrintReg(getRegister(), TRI) << '\n';
|
||||
else
|
||||
OS << "Stack offset " << getStackOffset() << '\n';
|
||||
}
|
||||
|
||||
char AMDGPUArgumentUsageInfo::ID = 0;
|
||||
|
||||
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
|
||||
|
||||
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
|
||||
ArgInfoMap.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
|
||||
for (const auto &FI : ArgInfoMap) {
|
||||
OS << "Arguments for " << FI.first->getName() << '\n'
|
||||
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
|
||||
<< " DispatchPtr: " << FI.second.DispatchPtr
|
||||
<< " QueuePtr: " << FI.second.QueuePtr
|
||||
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
|
||||
<< " DispatchID: " << FI.second.DispatchID
|
||||
<< " FlatScratchInit: " << FI.second.FlatScratchInit
|
||||
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
|
||||
<< " GridWorkgroupCountX: " << FI.second.GridWorkGroupCountX
|
||||
<< " GridWorkgroupCountY: " << FI.second.GridWorkGroupCountY
|
||||
<< " GridWorkgroupCountZ: " << FI.second.GridWorkGroupCountZ
|
||||
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
|
||||
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
|
||||
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
|
||||
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
|
||||
<< " PrivateSegmentWaveByteOffset: "
|
||||
<< FI.second.PrivateSegmentWaveByteOffset
|
||||
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
|
||||
<< " WorkItemIDX " << FI.second.WorkItemIDX
|
||||
<< " WorkItemIDY " << FI.second.WorkItemIDY
|
||||
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
|
||||
<< '\n';
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
AMDGPUFunctionArgInfo::getPreloadedValue(
|
||||
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
||||
switch (Value) {
|
||||
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
|
||||
return std::make_pair(
|
||||
PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
|
||||
&AMDGPU::SGPR_128RegClass);
|
||||
}
|
||||
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
|
||||
return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
||||
return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
||||
return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
||||
return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
|
||||
return std::make_pair(
|
||||
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
|
||||
return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::DISPATCH_ID:
|
||||
return std::make_pair(DispatchID ? &DispatchID : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
|
||||
return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
|
||||
return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::QUEUE_PTR:
|
||||
return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
|
||||
return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
|
||||
return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
|
||||
return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
}
|
||||
llvm_unreachable("unexpected preloaded value type");
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
//==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Function;
|
||||
class raw_ostream;
|
||||
class SISubtarget;
|
||||
class TargetMachine;
|
||||
class TargetRegisterClass;
|
||||
class TargetRegisterInfo;
|
||||
|
||||
struct ArgDescriptor {
|
||||
private:
|
||||
friend struct AMDGPUFunctionArgInfo;
|
||||
|
||||
union {
|
||||
unsigned Register;
|
||||
unsigned StackOffset;
|
||||
};
|
||||
|
||||
bool IsStack : 1;
|
||||
bool IsSet : 1;
|
||||
|
||||
ArgDescriptor(unsigned Val = 0, bool IsStack = false, bool IsSet = false)
|
||||
: Register(Val), IsStack(IsStack), IsSet(IsSet) {}
|
||||
public:
|
||||
static ArgDescriptor createRegister(unsigned Reg) {
|
||||
return ArgDescriptor(Reg, false, true);
|
||||
}
|
||||
|
||||
static ArgDescriptor createStack(unsigned Reg) {
|
||||
return ArgDescriptor(Reg, true, true);
|
||||
}
|
||||
|
||||
bool isSet() const {
|
||||
return IsSet;
|
||||
}
|
||||
|
||||
explicit operator bool() const {
|
||||
return isSet();
|
||||
}
|
||||
|
||||
bool isRegister() const {
|
||||
return !IsStack;
|
||||
}
|
||||
|
||||
unsigned getRegister() const {
|
||||
assert(!IsStack);
|
||||
return Register;
|
||||
}
|
||||
|
||||
unsigned getStackOffset() const {
|
||||
assert(IsStack);
|
||||
return StackOffset;
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
|
||||
Arg.print(OS);
|
||||
return OS;
|
||||
}
|
||||
|
||||
struct AMDGPUFunctionArgInfo {
|
||||
enum PreloadedValue {
|
||||
// SGPRS:
|
||||
PRIVATE_SEGMENT_BUFFER = 0,
|
||||
DISPATCH_PTR = 1,
|
||||
QUEUE_PTR = 2,
|
||||
KERNARG_SEGMENT_PTR = 3,
|
||||
DISPATCH_ID = 4,
|
||||
FLAT_SCRATCH_INIT = 5,
|
||||
WORKGROUP_ID_X = 10,
|
||||
WORKGROUP_ID_Y = 11,
|
||||
WORKGROUP_ID_Z = 12,
|
||||
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
|
||||
IMPLICIT_BUFFER_PTR = 15,
|
||||
|
||||
// VGPRS:
|
||||
FIRST_VGPR_VALUE = 16,
|
||||
WORKITEM_ID_X = FIRST_VGPR_VALUE,
|
||||
WORKITEM_ID_Y = 17,
|
||||
WORKITEM_ID_Z = 18
|
||||
};
|
||||
|
||||
// Kernel input registers setup for the HSA ABI in allocation order.
|
||||
|
||||
// User SGPRs in kernels
|
||||
// XXX - Can these require argument spills?
|
||||
ArgDescriptor PrivateSegmentBuffer;
|
||||
ArgDescriptor DispatchPtr;
|
||||
ArgDescriptor QueuePtr;
|
||||
ArgDescriptor KernargSegmentPtr;
|
||||
ArgDescriptor DispatchID;
|
||||
ArgDescriptor FlatScratchInit;
|
||||
ArgDescriptor PrivateSegmentSize;
|
||||
ArgDescriptor GridWorkGroupCountX;
|
||||
ArgDescriptor GridWorkGroupCountY;
|
||||
ArgDescriptor GridWorkGroupCountZ;
|
||||
|
||||
// System SGPRs in kernels.
|
||||
ArgDescriptor WorkGroupIDX;
|
||||
ArgDescriptor WorkGroupIDY;
|
||||
ArgDescriptor WorkGroupIDZ;
|
||||
ArgDescriptor WorkGroupInfo;
|
||||
ArgDescriptor PrivateSegmentWaveByteOffset;
|
||||
|
||||
// Input registers for non-HSA ABI
|
||||
ArgDescriptor ImplicitBufferPtr = 0;
|
||||
|
||||
// VGPRs inputs. These are always v0, v1 and v2 for entry functions.
|
||||
ArgDescriptor WorkItemIDX;
|
||||
ArgDescriptor WorkItemIDY;
|
||||
ArgDescriptor WorkItemIDZ;
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
getPreloadedValue(PreloadedValue Value) const;
|
||||
};
|
||||
|
||||
class AMDGPUArgumentUsageInfo : public ImmutablePass {
|
||||
private:
|
||||
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
|
||||
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool doFinalization(Module &M) override;
|
||||
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
||||
|
||||
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
|
||||
ArgInfoMap[&F] = ArgInfo;
|
||||
}
|
||||
|
||||
const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const {
|
||||
auto I = ArgInfoMap.find(&F);
|
||||
if (I == ArgInfoMap.end()) {
|
||||
assert(F.isDeclaration());
|
||||
return ExternFunctionInfo;
|
||||
}
|
||||
|
||||
return I->second;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
|
@ -13,6 +13,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUArgumentUsageInfo.h"
|
||||
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
|
@ -70,12 +71,18 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
|||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
public:
|
||||
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
|
||||
: SelectionDAGISel(TM, OptLevel){
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
|
||||
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
|
||||
: SelectionDAGISel(*TM, OptLevel) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
|
||||
}
|
||||
~AMDGPUDAGToDAGISel() override = default;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
||||
SelectionDAGISel::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
void Select(SDNode *N) override;
|
||||
StringRef getPassName() const override;
|
||||
|
@ -206,9 +213,15 @@ private:
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
|
||||
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
|
||||
INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
|
||||
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
|
||||
|
||||
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
|
||||
// DAG, ready for instruction scheduling.
|
||||
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
|
||||
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
return new AMDGPUDAGToDAGISel(TM, OptLevel);
|
||||
}
|
||||
|
|
|
@ -140,6 +140,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
|||
initializeR600PacketizerPass(*PR);
|
||||
initializeR600ExpandSpecialInstrsPassPass(*PR);
|
||||
initializeR600VectorRegMergerPass(*PR);
|
||||
initializeAMDGPUDAGToDAGISelPass(*PR);
|
||||
initializeSILowerI1CopiesPass(*PR);
|
||||
initializeSIFixSGPRCopiesPass(*PR);
|
||||
initializeSIFixVGPRCopiesPass(*PR);
|
||||
|
@ -152,6 +153,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
|||
initializeAMDGPUAlwaysInlinePass(*PR);
|
||||
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
|
||||
initializeAMDGPUAnnotateUniformValuesPass(*PR);
|
||||
initializeAMDGPUArgumentUsageInfoPass(*PR);
|
||||
initializeAMDGPULowerIntrinsicsPass(*PR);
|
||||
initializeAMDGPUPromoteAllocaPass(*PR);
|
||||
initializeAMDGPUCodeGenPreparePass(*PR);
|
||||
|
@ -638,7 +640,7 @@ bool AMDGPUPassConfig::addPreISel() {
|
|||
}
|
||||
|
||||
bool AMDGPUPassConfig::addInstSelector() {
|
||||
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
|
||||
addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ add_llvm_target(AMDGPUCodeGen
|
|||
AMDGPUAlwaysInlinePass.cpp
|
||||
AMDGPUAnnotateKernelFeatures.cpp
|
||||
AMDGPUAnnotateUniformValues.cpp
|
||||
AMDGPUArgumentUsageInfo.cpp
|
||||
AMDGPUAsmPrinter.cpp
|
||||
AMDGPUCallLowering.cpp
|
||||
AMDGPUCodeGenPrepare.cpp
|
||||
|
|
Loading…
Reference in New Issue