forked from OSchip/llvm-project
AMDGPU/GlobalISel: Add support for amdgpu_ps calling convention
Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D45837 llvm-svn: 330767
This commit is contained in:
parent
a4e557f908
commit
c7709e1c29
|
@ -139,19 +139,49 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||||
unsigned NumArgs = F.arg_size();
|
unsigned NumArgs = F.arg_size();
|
||||||
Function::const_arg_iterator CurOrigArg = F.arg_begin();
|
Function::const_arg_iterator CurOrigArg = F.arg_begin();
|
||||||
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
|
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
|
||||||
|
unsigned PSInputNum = 0;
|
||||||
|
BitVector Skipped(NumArgs);
|
||||||
for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
|
for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
|
||||||
EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
|
EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
|
||||||
|
|
||||||
// We can only hanlde simple value types at the moment.
|
// We can only hanlde simple value types at the moment.
|
||||||
if (!ValEVT.isSimple())
|
|
||||||
return false;
|
|
||||||
MVT ValVT = ValEVT.getSimpleVT();
|
|
||||||
ISD::ArgFlagsTy Flags;
|
ISD::ArgFlagsTy Flags;
|
||||||
ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
|
ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
|
||||||
setArgFlags(OrigArg, i + 1, DL, F);
|
setArgFlags(OrigArg, i + 1, DL, F);
|
||||||
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
|
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
|
||||||
|
|
||||||
|
if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
|
||||||
|
!OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
|
||||||
|
PSInputNum <= 15) {
|
||||||
|
if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
|
||||||
|
Skipped.set(i);
|
||||||
|
++PSInputNum;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Info->markPSInputAllocated(PSInputNum);
|
||||||
|
if (!CurOrigArg->use_empty())
|
||||||
|
Info->markPSInputEnabled(PSInputNum);
|
||||||
|
|
||||||
|
++PSInputNum;
|
||||||
|
}
|
||||||
|
|
||||||
CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
|
CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
|
||||||
/*IsVarArg=*/false);
|
/*IsVarArg=*/false);
|
||||||
|
|
||||||
|
if (ValEVT.isVector()) {
|
||||||
|
EVT ElemVT = ValEVT.getVectorElementType();
|
||||||
|
if (!ValEVT.isSimple())
|
||||||
|
return false;
|
||||||
|
MVT ValVT = ElemVT.getSimpleVT();
|
||||||
|
bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
|
||||||
|
OrigArg.Flags, CCInfo);
|
||||||
|
if (!Res)
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
MVT ValVT = ValEVT.getSimpleVT();
|
||||||
|
if (!ValEVT.isSimple())
|
||||||
|
return false;
|
||||||
bool Res =
|
bool Res =
|
||||||
AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
|
AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
|
||||||
|
|
||||||
|
@ -159,20 +189,25 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||||
if (Res)
|
if (Res)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Function::const_arg_iterator Arg = F.arg_begin();
|
Function::const_arg_iterator Arg = F.arg_begin();
|
||||||
|
|
||||||
if (F.getCallingConv() == CallingConv::AMDGPU_VS) {
|
if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
|
||||||
for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
|
F.getCallingConv() == CallingConv::AMDGPU_PS) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
for (unsigned i = 0, OrigArgIdx = 0;
|
||||||
MRI.addLiveIn(VA.getLocReg(), VRegs[i]);
|
OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
|
||||||
|
if (Skipped.test(OrigArgIdx))
|
||||||
|
continue;
|
||||||
|
CCValAssign &VA = ArgLocs[i++];
|
||||||
|
MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
|
||||||
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
|
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
|
||||||
MIRBuilder.buildCopy(VRegs[i], VA.getLocReg());
|
MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
|
for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) {
|
||||||
// FIXME: We should be getting DebugInfo from the arguments some how.
|
// FIXME: We should be getting DebugInfo from the arguments some how.
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
lowerParameter(MIRBuilder, Arg->getType(),
|
lowerParameter(MIRBuilder, Arg->getType(),
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
; Check that we correctly skip over disabled inputs
|
||||||
|
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||||
|
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||||
|
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32)
|
||||||
|
define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 {
|
||||||
|
main_body:
|
||||||
|
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { "InitialPSInputAddr"="0x00002" }
|
Loading…
Reference in New Issue