AMDGPU/GlobalISel: Add types to special inputs

When passing special ABI inputs, we have no existing context for the
type to use.
This commit is contained in:
Matt Arsenault 2020-07-05 13:17:02 -04:00 committed by Matt Arsenault
parent 1d8cb09923
commit f25d020c2e
5 changed files with 72 additions and 59 deletions

View File

@ -83,59 +83,63 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
}
}
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
AMDGPUFunctionArgInfo::getPreloadedValue(
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
switch (Value) {
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
return std::make_pair(
PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
&AMDGPU::SGPR_128RegClass);
return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer
: nullptr,
&AMDGPU::SGPR_128RegClass, LLT::vector(4, 32));
}
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
&AMDGPU::SGPR_32RegClass);
return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
&AMDGPU::SGPR_32RegClass);
return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass);
return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return std::make_pair(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass);
return std::make_tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
return std::make_pair(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::DISPATCH_ID:
return std::make_pair(DispatchID ? &DispatchID : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(DispatchID ? &DispatchID : nullptr,
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::QUEUE_PTR:
return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
&AMDGPU::SGPR_64RegClass);
return std::make_tuple(QueuePtr ? &QueuePtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
&AMDGPU::VGPR_32RegClass);
return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
&AMDGPU::VGPR_32RegClass);
return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
&AMDGPU::VGPR_32RegClass);
return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
}
llvm_unreachable("unexpected preloaded value type");
}

View File

@ -12,6 +12,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Pass.h"
#include "llvm/Support/LowLevelTypeImpl.h"
namespace llvm {
@ -148,7 +149,7 @@ struct AMDGPUFunctionArgInfo {
ArgDescriptor WorkItemIDY;
ArgDescriptor WorkItemIDZ;
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(PreloadedValue Value) const;
static constexpr AMDGPUFunctionArgInfo fixedABILayout();

View File

@ -2442,7 +2442,8 @@ const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor(
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
const ArgDescriptor *Arg;
const TargetRegisterClass *RC;
std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
LLT ArgTy;
std::tie(Arg, RC, ArgTy) = MFI->getPreloadedValue(ArgType);
if (!Arg) {
LLVM_DEBUG(dbgs() << "Required arg register missing\n");
return nullptr;
@ -3178,8 +3179,9 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
const ArgDescriptor *Arg;
const TargetRegisterClass *RC;
std::tie(Arg, RC)
= MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
LLT ArgTy;
std::tie(Arg, RC, ArgTy) =
MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
if (!Arg)
return false;

View File

@ -1527,9 +1527,10 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
const ArgDescriptor *InputPtrReg;
const TargetRegisterClass *RC;
LLT ArgTy;
std::tie(InputPtrReg, RC)
= Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
std::tie(InputPtrReg, RC, ArgTy) =
Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
@ -1675,8 +1676,9 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
const ArgDescriptor *Reg;
const TargetRegisterClass *RC;
LLT Ty;
std::tie(Reg, RC) = MFI.getPreloadedValue(PVID);
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
}
@ -2580,15 +2582,18 @@ void SITargetLowering::passSpecialInputs(
for (auto InputID : InputRegs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
std::tie(OutgoingArg, ArgRC) = CalleeArgInfo->getPreloadedValue(InputID);
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
continue;
const ArgDescriptor *IncomingArg;
const TargetRegisterClass *IncomingArgRC;
std::tie(IncomingArg, IncomingArgRC)
= CallerArgInfo.getPreloadedValue(InputID);
LLT Ty;
std::tie(IncomingArg, IncomingArgRC, Ty) =
CallerArgInfo.getPreloadedValue(InputID);
assert(IncomingArgRC == ArgRC);
// All special arguments are ints for now.
@ -2621,24 +2626,25 @@ void SITargetLowering::passSpecialInputs(
// packed.
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT Ty;
std::tie(OutgoingArg, ArgRC) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
if (!OutgoingArg)
return;
const ArgDescriptor *IncomingArgX
= CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X).first;
const ArgDescriptor *IncomingArgY
= CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y).first;
const ArgDescriptor *IncomingArgZ
= CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z).first;
const ArgDescriptor *IncomingArgX = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
const ArgDescriptor *IncomingArgY = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
const ArgDescriptor *IncomingArgZ = std::get<0>(
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));
SDValue InputReg;
SDLoc SL;

View File

@ -679,13 +679,13 @@ public:
return ArgInfo;
}
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
return ArgInfo.getPreloadedValue(Value);
}
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
auto Arg = ArgInfo.getPreloadedValue(Value).first;
auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
return Arg ? Arg->getRegister() : Register();
}