forked from OSchip/llvm-project
AMDGPU/GlobalISel: Add types to special inputs
When passing special ABI inputs, we have no existing context for the type to use.
This commit is contained in:
parent
1d8cb09923
commit
f25d020c2e
|
@ -83,59 +83,63 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
|
|||
}
|
||||
}
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
||||
AMDGPUFunctionArgInfo::getPreloadedValue(
|
||||
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
||||
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
||||
switch (Value) {
|
||||
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
|
||||
return std::make_pair(
|
||||
PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
|
||||
&AMDGPU::SGPR_128RegClass);
|
||||
return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer
|
||||
: nullptr,
|
||||
&AMDGPU::SGPR_128RegClass, LLT::vector(4, 32));
|
||||
}
|
||||
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
|
||||
return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass,
|
||||
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
||||
return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
|
||||
return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
||||
return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
||||
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
||||
return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
||||
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
|
||||
return std::make_pair(
|
||||
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass);
|
||||
return std::make_tuple(
|
||||
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
|
||||
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
||||
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
|
||||
return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass,
|
||||
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
||||
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
|
||||
return std::make_pair(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass,
|
||||
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
||||
case AMDGPUFunctionArgInfo::DISPATCH_ID:
|
||||
return std::make_pair(DispatchID ? &DispatchID : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(DispatchID ? &DispatchID : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
|
||||
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
|
||||
return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
|
||||
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
|
||||
return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass,
|
||||
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
||||
case AMDGPUFunctionArgInfo::QUEUE_PTR:
|
||||
return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass);
|
||||
return std::make_tuple(QueuePtr ? &QueuePtr : nullptr,
|
||||
&AMDGPU::SGPR_64RegClass,
|
||||
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
|
||||
return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
|
||||
return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
||||
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
|
||||
return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
|
||||
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
||||
}
|
||||
llvm_unreachable("unexpected preloaded value type");
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/CodeGen/Register.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/LowLevelTypeImpl.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -148,7 +149,7 @@ struct AMDGPUFunctionArgInfo {
|
|||
ArgDescriptor WorkItemIDY;
|
||||
ArgDescriptor WorkItemIDZ;
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
||||
getPreloadedValue(PreloadedValue Value) const;
|
||||
|
||||
static constexpr AMDGPUFunctionArgInfo fixedABILayout();
|
||||
|
|
|
@ -2442,7 +2442,8 @@ const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor(
|
|||
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
|
||||
const ArgDescriptor *Arg;
|
||||
const TargetRegisterClass *RC;
|
||||
std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
|
||||
LLT ArgTy;
|
||||
std::tie(Arg, RC, ArgTy) = MFI->getPreloadedValue(ArgType);
|
||||
if (!Arg) {
|
||||
LLVM_DEBUG(dbgs() << "Required arg register missing\n");
|
||||
return nullptr;
|
||||
|
@ -3178,8 +3179,9 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
|
|||
|
||||
const ArgDescriptor *Arg;
|
||||
const TargetRegisterClass *RC;
|
||||
std::tie(Arg, RC)
|
||||
= MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
||||
LLT ArgTy;
|
||||
std::tie(Arg, RC, ArgTy) =
|
||||
MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
||||
if (!Arg)
|
||||
return false;
|
||||
|
||||
|
|
|
@ -1527,9 +1527,10 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
|
|||
|
||||
const ArgDescriptor *InputPtrReg;
|
||||
const TargetRegisterClass *RC;
|
||||
LLT ArgTy;
|
||||
|
||||
std::tie(InputPtrReg, RC)
|
||||
= Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
||||
std::tie(InputPtrReg, RC, ArgTy) =
|
||||
Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
||||
|
||||
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
||||
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
|
@ -1675,8 +1676,9 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
|
|||
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
|
||||
const ArgDescriptor *Reg;
|
||||
const TargetRegisterClass *RC;
|
||||
LLT Ty;
|
||||
|
||||
std::tie(Reg, RC) = MFI.getPreloadedValue(PVID);
|
||||
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
|
||||
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
|
||||
}
|
||||
|
||||
|
@ -2580,15 +2582,18 @@ void SITargetLowering::passSpecialInputs(
|
|||
for (auto InputID : InputRegs) {
|
||||
const ArgDescriptor *OutgoingArg;
|
||||
const TargetRegisterClass *ArgRC;
|
||||
LLT ArgTy;
|
||||
|
||||
std::tie(OutgoingArg, ArgRC) = CalleeArgInfo->getPreloadedValue(InputID);
|
||||
std::tie(OutgoingArg, ArgRC, ArgTy) =
|
||||
CalleeArgInfo->getPreloadedValue(InputID);
|
||||
if (!OutgoingArg)
|
||||
continue;
|
||||
|
||||
const ArgDescriptor *IncomingArg;
|
||||
const TargetRegisterClass *IncomingArgRC;
|
||||
std::tie(IncomingArg, IncomingArgRC)
|
||||
= CallerArgInfo.getPreloadedValue(InputID);
|
||||
LLT Ty;
|
||||
std::tie(IncomingArg, IncomingArgRC, Ty) =
|
||||
CallerArgInfo.getPreloadedValue(InputID);
|
||||
assert(IncomingArgRC == ArgRC);
|
||||
|
||||
// All special arguments are ints for now.
|
||||
|
@ -2621,24 +2626,25 @@ void SITargetLowering::passSpecialInputs(
|
|||
// packed.
|
||||
const ArgDescriptor *OutgoingArg;
|
||||
const TargetRegisterClass *ArgRC;
|
||||
LLT Ty;
|
||||
|
||||
std::tie(OutgoingArg, ArgRC) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
||||
std::tie(OutgoingArg, ArgRC, Ty) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
||||
if (!OutgoingArg)
|
||||
std::tie(OutgoingArg, ArgRC) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
||||
std::tie(OutgoingArg, ArgRC, Ty) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
||||
if (!OutgoingArg)
|
||||
std::tie(OutgoingArg, ArgRC) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
||||
std::tie(OutgoingArg, ArgRC, Ty) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
||||
if (!OutgoingArg)
|
||||
return;
|
||||
|
||||
const ArgDescriptor *IncomingArgX
|
||||
= CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X).first;
|
||||
const ArgDescriptor *IncomingArgY
|
||||
= CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y).first;
|
||||
const ArgDescriptor *IncomingArgZ
|
||||
= CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z).first;
|
||||
const ArgDescriptor *IncomingArgX = std::get<0>(
|
||||
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
|
||||
const ArgDescriptor *IncomingArgY = std::get<0>(
|
||||
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
|
||||
const ArgDescriptor *IncomingArgZ = std::get<0>(
|
||||
CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));
|
||||
|
||||
SDValue InputReg;
|
||||
SDLoc SL;
|
||||
|
|
|
@ -679,13 +679,13 @@ public:
|
|||
return ArgInfo;
|
||||
}
|
||||
|
||||
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
|
||||
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
||||
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
||||
return ArgInfo.getPreloadedValue(Value);
|
||||
}
|
||||
|
||||
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
||||
auto Arg = ArgInfo.getPreloadedValue(Value).first;
|
||||
auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
|
||||
return Arg ? Arg->getRegister() : Register();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue