AMDGPU/GlobalISel: Add support for amdgpu_ps calling convention

Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D45837 llvm-svn: 330767
2018-04-24 20:51:28 +00:00 · 2018-04-24 20:51:28 +00:00 · c7709e1c29
parent a4e557f908
commit c7709e1c29
2 changed files with 65 additions and 14 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@ -139,40 +139,75 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
  unsigned NumArgs = F.arg_size();
  Function::const_arg_iterator CurOrigArg = F.arg_begin();
  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+  unsigned PSInputNum = 0;
+  BitVector Skipped(NumArgs);
  for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
    EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());

    // We can only hanlde simple value types at the moment.
-    if (!ValEVT.isSimple())
-      return false;
-    MVT ValVT = ValEVT.getSimpleVT();
    ISD::ArgFlagsTy Flags;
    ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
    setArgFlags(OrigArg, i + 1, DL, F);
    Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
+
+    if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
+        !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
+        PSInputNum <= 15) {
+      if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
+        Skipped.set(i);
+        ++PSInputNum;
+        continue;
+      }
+
+      Info->markPSInputAllocated(PSInputNum);
+      if (!CurOrigArg->use_empty())
+        Info->markPSInputEnabled(PSInputNum);
+
+      ++PSInputNum;
+    }
+
    CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
                                             /*IsVarArg=*/false);
-    bool Res =
-        AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);

-    // Fail if we don't know how to handle this type.
-    if (Res)
-      return false;
+    if (ValEVT.isVector()) {
+      EVT ElemVT = ValEVT.getVectorElementType();
+      if (!ValEVT.isSimple())
+        return false;
+      MVT ValVT = ElemVT.getSimpleVT();
+      bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
+                          OrigArg.Flags, CCInfo);
+      if (!Res)
+        return false;
+    } else {
+      MVT ValVT = ValEVT.getSimpleVT();
+      if (!ValEVT.isSimple())
+        return false;
+      bool Res =
+          AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
+
+      // Fail if we don't know how to handle this type.
+      if (Res)
+        return false;
+    }
  }

  Function::const_arg_iterator Arg = F.arg_begin();

-  if (F.getCallingConv() == CallingConv::AMDGPU_VS) {
-    for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
-      CCValAssign &VA = ArgLocs[i];
-      MRI.addLiveIn(VA.getLocReg(), VRegs[i]);
+  if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
+      F.getCallingConv() == CallingConv::AMDGPU_PS) {
+    for (unsigned i = 0, OrigArgIdx = 0;
+         OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
+       if (Skipped.test(OrigArgIdx))
+          continue;
+      CCValAssign &VA = ArgLocs[i++];
+      MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
      MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
-      MIRBuilder.buildCopy(VRegs[i], VA.getLocReg());
+      MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
    }
    return true;
  }

-  for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
+  for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) {
    // FIXME: We should be getting DebugInfo from the arguments some how.
    CCValAssign &VA = ArgLocs[i];
    lowerParameter(MIRBuilder, Arg->getType(),
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@ -0,0 +1,16 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
+
+; Check that we correctly skip over disabled inputs
+; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
+; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
+; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32)
+define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 {
+main_body:
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
+  ret void
+}
+
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)  #0
+
+attributes #0 = { nounwind }
+attributes #1 = { "InitialPSInputAddr"="0x00002" }