AMDGPU: Add analysis pass for function argument info

This will allow only adding necessary inputs to callee functions
that need special inputs forwarded from the kernel.

llvm-svn: 309996
This commit is contained in:
Matt Arsenault 2017-08-03 22:30:46 +00:00
parent 2d30c64ae3
commit 7016f13450
6 changed files with 326 additions and 7 deletions

View File

@ -54,6 +54,8 @@ FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
extern char &AMDGPUMachineCFGStructurizerID;
@ -124,8 +126,9 @@ void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
extern char &AMDGPUPromoteAllocaID;
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAMDGPUISelDag(
TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
@ -167,6 +170,8 @@ extern char &AMDGPUUnifyDivergentExitNodesID;
ImmutablePass *createAMDGPUAAWrapperPass();
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
Target &getTheAMDGPUTarget();
Target &getTheGCNTarget();

View File

@ -0,0 +1,127 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUArgumentUsageInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
"Argument Register Usage Information Storage", false, true)
void ArgDescriptor::print(raw_ostream &OS,
const TargetRegisterInfo *TRI) const {
if (!isSet()) {
OS << "<not set>\n";
return;
}
if (isRegister())
OS << "Reg " << PrintReg(getRegister(), TRI) << '\n';
else
OS << "Stack offset " << getStackOffset() << '\n';
}
char AMDGPUArgumentUsageInfo::ID = 0;
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
return false;
}
bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
ArgInfoMap.clear();
return false;
}
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const auto &FI : ArgInfoMap) {
OS << "Arguments for " << FI.first->getName() << '\n'
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
<< " DispatchPtr: " << FI.second.DispatchPtr
<< " QueuePtr: " << FI.second.QueuePtr
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
<< " DispatchID: " << FI.second.DispatchID
<< " FlatScratchInit: " << FI.second.FlatScratchInit
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
<< " GridWorkgroupCountX: " << FI.second.GridWorkGroupCountX
<< " GridWorkgroupCountY: " << FI.second.GridWorkGroupCountY
<< " GridWorkgroupCountZ: " << FI.second.GridWorkGroupCountZ
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
<< " PrivateSegmentWaveByteOffset: "
<< FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
<< " WorkItemIDX " << FI.second.WorkItemIDX
<< " WorkItemIDY " << FI.second.WorkItemIDY
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
<< '\n';
}
}
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
AMDGPUFunctionArgInfo::getPreloadedValue(
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
switch (Value) {
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
return std::make_pair(
PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
&AMDGPU::SGPR_128RegClass);
}
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
&AMDGPU::SGPR_32RegClass);
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
&AMDGPU::SGPR_32RegClass);
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass);
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return std::make_pair(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass);
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
case AMDGPUFunctionArgInfo::DISPATCH_ID:
return std::make_pair(DispatchID ? &DispatchID : nullptr,
&AMDGPU::SGPR_64RegClass);
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
&AMDGPU::SGPR_64RegClass);
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
case AMDGPUFunctionArgInfo::QUEUE_PTR:
return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
&AMDGPU::SGPR_64RegClass);
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
&AMDGPU::VGPR_32RegClass);
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
&AMDGPU::VGPR_32RegClass);
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
&AMDGPU::VGPR_32RegClass);
}
llvm_unreachable("unexpected preloaded value type");
}

View File

@ -0,0 +1,171 @@
//==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
namespace llvm {
class Function;
class raw_ostream;
class SISubtarget;
class TargetMachine;
class TargetRegisterClass;
class TargetRegisterInfo;
struct ArgDescriptor {
private:
friend struct AMDGPUFunctionArgInfo;
union {
unsigned Register;
unsigned StackOffset;
};
bool IsStack : 1;
bool IsSet : 1;
ArgDescriptor(unsigned Val = 0, bool IsStack = false, bool IsSet = false)
: Register(Val), IsStack(IsStack), IsSet(IsSet) {}
public:
static ArgDescriptor createRegister(unsigned Reg) {
return ArgDescriptor(Reg, false, true);
}
static ArgDescriptor createStack(unsigned Reg) {
return ArgDescriptor(Reg, true, true);
}
bool isSet() const {
return IsSet;
}
explicit operator bool() const {
return isSet();
}
bool isRegister() const {
return !IsStack;
}
unsigned getRegister() const {
assert(!IsStack);
return Register;
}
unsigned getStackOffset() const {
assert(IsStack);
return StackOffset;
}
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
};
inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
Arg.print(OS);
return OS;
}
struct AMDGPUFunctionArgInfo {
enum PreloadedValue {
// SGPRS:
PRIVATE_SEGMENT_BUFFER = 0,
DISPATCH_PTR = 1,
QUEUE_PTR = 2,
KERNARG_SEGMENT_PTR = 3,
DISPATCH_ID = 4,
FLAT_SCRATCH_INIT = 5,
WORKGROUP_ID_X = 10,
WORKGROUP_ID_Y = 11,
WORKGROUP_ID_Z = 12,
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
// VGPRS:
FIRST_VGPR_VALUE = 16,
WORKITEM_ID_X = FIRST_VGPR_VALUE,
WORKITEM_ID_Y = 17,
WORKITEM_ID_Z = 18
};
// Kernel input registers setup for the HSA ABI in allocation order.
// User SGPRs in kernels
// XXX - Can these require argument spills?
ArgDescriptor PrivateSegmentBuffer;
ArgDescriptor DispatchPtr;
ArgDescriptor QueuePtr;
ArgDescriptor KernargSegmentPtr;
ArgDescriptor DispatchID;
ArgDescriptor FlatScratchInit;
ArgDescriptor PrivateSegmentSize;
ArgDescriptor GridWorkGroupCountX;
ArgDescriptor GridWorkGroupCountY;
ArgDescriptor GridWorkGroupCountZ;
// System SGPRs in kernels.
ArgDescriptor WorkGroupIDX;
ArgDescriptor WorkGroupIDY;
ArgDescriptor WorkGroupIDZ;
ArgDescriptor WorkGroupInfo;
ArgDescriptor PrivateSegmentWaveByteOffset;
// Input registers for non-HSA ABI
ArgDescriptor ImplicitBufferPtr = 0;
// VGPRs inputs. These are always v0, v1 and v2 for entry functions.
ArgDescriptor WorkItemIDX;
ArgDescriptor WorkItemIDY;
ArgDescriptor WorkItemIDZ;
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
getPreloadedValue(PreloadedValue Value) const;
};
class AMDGPUArgumentUsageInfo : public ImmutablePass {
private:
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
public:
static char ID;
AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
bool doInitialization(Module &M) override;
bool doFinalization(Module &M) override;
void print(raw_ostream &OS, const Module *M = nullptr) const override;
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
ArgInfoMap[&F] = ArgInfo;
}
const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const {
auto I = ArgInfoMap.find(&F);
if (I == ArgInfoMap.end()) {
assert(F.isDeclaration());
return ExternFunctionInfo;
}
return I->second;
}
};
} // end namespace llvm
#endif

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
@ -70,12 +71,18 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel){
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
: SelectionDAGISel(*TM, OptLevel) {
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
}
~AMDGPUDAGToDAGISel() override = default;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
SelectionDAGISel::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
@ -206,9 +213,15 @@ private:
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
CodeGenOpt::Level OptLevel) {
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}

View File

@ -140,6 +140,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeR600PacketizerPass(*PR);
initializeR600ExpandSpecialInstrsPassPass(*PR);
initializeR600VectorRegMergerPass(*PR);
initializeAMDGPUDAGToDAGISelPass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
@ -152,6 +153,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPUArgumentUsageInfoPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
@ -638,7 +640,7 @@ bool AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
return false;
}

View File

@ -21,6 +21,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
AMDGPUArgumentUsageInfo.cpp
AMDGPUAsmPrinter.cpp
AMDGPUCallLowering.cpp
AMDGPUCodeGenPrepare.cpp