[ARM] GlobalISel: Support array returns

These are a bit rare in practice, but they don't require anything special compared to array parameters, so support them as well. llvm-svn: 304137
2017-05-29 08:19:19 +00:00 · 2017-05-29 08:19:19 +00:00 · bf4aed2c38
parent d9fb2842e7
commit bf4aed2c38
2 changed files with 112 additions and 27 deletions
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@ -214,8 +214,9 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
  SmallVector<ArgInfo, 4> SplitVTs;
  ArgInfo RetInfo(VReg, Val->getType());
  setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
-  splitToValueTypes(RetInfo, SplitVTs, MF,
-                    [&](unsigned Reg, uint64_t Offset) {});
+  splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
+    MIRBuilder.buildExtract(Reg, VReg, Offset);
+  });

  CCAssignFn *AssignFn =
      TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@ -476,13 +477,34 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
      return false;

    ArgInfos.clear();
+    SmallVector<uint64_t, 8> RegOffsets;
+    SmallVector<unsigned, 8> SplitRegs;
    splitToValueTypes(OrigRet, ArgInfos, MF,
-                      [&](unsigned Reg, uint64_t Offset) {});
+                      [&](unsigned Reg, uint64_t Offset) {
+                        RegOffsets.push_back(Offset);
+                        SplitRegs.push_back(Reg);
+                      });

    auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
    CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
    if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
      return false;
+
+    if (!RegOffsets.empty()) {
+      // We have split the value and allocated each individual piece, now build
+      // it up again.
+      LLT Ty = MRI.getType(OrigRet.Reg);
+      unsigned Dst = MRI.createGenericVirtualRegister(Ty);
+      MIRBuilder.buildUndef(Dst);
+
+      for (unsigned i = 0; i < SplitRegs.size(); ++i) {
+        unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+        MIRBuilder.buildInsert(Tmp, Dst, SplitRegs[i], RegOffsets[i]);
+        Dst = Tmp;
+      }
+
+      MIRBuilder.buildCopy(OrigRet.Reg, Dst);
+    }
  }

  // We now know the size of the stack - update the ADJCALLSTACKDOWN
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@ -620,9 +620,9 @@ entry:
  ret float %r
 }

-declare arm_aapcscc void @tiny_int_arrays_target([2 x i32])
+declare arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32])

-define arm_aapcscc void @test_tiny_int_arrays([2 x i32] %arr) {
+define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) {
 ; CHECK-LABEL: name: test_tiny_int_arrays
 ; CHECK: liveins: %r0, %r1
 ; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0
@ -636,12 +636,29 @@ define arm_aapcscc void @test_tiny_int_arrays([2 x i32] %arr) {
 ; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR]](s64), 32
 ; CHECK: %r0 = COPY [[R0]]
 ; CHECK: %r1 = COPY [[R1]]
-; CHECK: BLX @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1
+; CHECK: BLX @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1
+; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[R2:%[0-9]+]](s32) = COPY %r2
+; CHECK: [[RES_ARR0:%[0-9]+]](s96) = IMPLICIT_DEF
+; CHECK: [[RES_ARR1:%[0-9]+]](s96) = G_INSERT [[RES_ARR0]], [[R0]](s32), 0
+; CHECK: [[RES_ARR2:%[0-9]+]](s96) = G_INSERT [[RES_ARR1]], [[R1]](s32), 32
+; CHECK: [[RES_ARR3:%[0-9]+]](s96) = G_INSERT [[RES_ARR2]], [[R2]](s32), 64
+; CHECK: [[RES_ARR:%[0-9]+]](s96) = COPY [[RES_ARR3]]
 ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: BX_RET 14, _
+; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s96), 0
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s96), 32
+; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s96), 64
+; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say
+; that composite types larger than 4 bytes should be passed through memory),
+; but it's what DAGISel does. We should fix it in the common code for both.
+; CHECK: %r0 = COPY [[R0]]
+; CHECK: %r1 = COPY [[R1]]
+; CHECK: %r2 = COPY [[R2]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1, implicit %r2
 entry:
-  notail call arm_aapcscc void @tiny_int_arrays_target([2 x i32] %arr)
-  ret void
+  %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr)
+  ret [3 x i32] %r
 }

 declare arm_aapcscc void @multiple_int_arrays_target([2 x i32], [2 x i32])
@ -662,6 +679,14 @@ define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %ar
 ; CHECK: [[ARG_ARR1_2:%[0-9]+]](s64) = G_INSERT [[ARG_ARR1_1]], [[R3]](s32), 32
 ; CHECK: [[ARG_ARR1:%[0-9]+]](s64) = COPY [[ARG_ARR1_2]]
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR0]](s64), 0
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR0]](s64), 32
+; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR1]](s64), 0
+; CHECK: [[R3:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR1]](s64), 32
+; CHECK: %r0 = COPY [[R0]]
+; CHECK: %r1 = COPY [[R1]]
+; CHECK: %r2 = COPY [[R2]]
+; CHECK: %r3 = COPY [[R3]]
 ; CHECK: BLX @multiple_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3
 ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
 ; CHECK: BX_RET 14, _
@ -725,9 +750,9 @@ entry:
  ret void
 }

-declare arm_aapcscc void @fp_arrays_aapcs_target([3 x double])
+declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double])

-define arm_aapcscc void @test_fp_arrays_aapcs([3 x double] %arr) {
+define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) {
 ; CHECK-LABEL: name: test_fp_arrays_aapcs
 ; CHECK: fixedStack:
 ; CHECK: id: [[ARR2_ID:[0-9]+]], offset: 0, size: 8
@ -767,17 +792,27 @@ define arm_aapcscc void @test_fp_arrays_aapcs([3 x double] %arr) {
 ; CHECK: [[ARR2_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 0
 ; CHECK: [[ARR2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[ARR2_OFFSET]](s32)
 ; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store 8
-; CHECK: BLX @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3
+; CHECK: BLX @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[R_MERGED_0:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[R_MERGED_1:%[0-9]+]](s64) = G_INSERT [[R_MERGED_0]], [[R0]](s32), 0
+; CHECK: [[R_MERGED_2:%[0-9]+]](s64) = G_INSERT [[R_MERGED_1]], [[R1]](s32), 32
+; CHECK: [[R_MERGED:%[0-9]+]](s64) = COPY [[R_MERGED_2]]
 ; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: BX_RET 14, _
+; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s64), 0
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s64), 32
+; CHECK: %r0 = COPY [[R0]]
+; CHECK: %r1 = COPY [[R1]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
 entry:
-  notail call arm_aapcscc void @fp_arrays_aapcs_target([3 x double] %arr)
-  ret void
+  %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr)
+  ret [2 x float] %r
 }

-declare arm_aapcs_vfpcc void @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double])
+declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double])

-define arm_aapcs_vfpcc void @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) {
+define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) {
 ; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp
 ; CHECK: fixedStack:
 ; CHECK-DAG: id: [[Z0_ID:[0-9]+]], offset: 0, size: 8
@ -848,17 +883,35 @@ define arm_aapcs_vfpcc void @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x floa
 ; CHECK: [[Z3_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 24
 ; CHECK: [[Z3_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z3_OFFSET]](s32)
 ; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store 8
-; CHECK: BLX @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8
+; CHECK: BLX @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8, implicit-def %s0, implicit-def %s1, implicit-def %s2, implicit-def %s3
+; CHECK: [[R0:%[0-9]+]](s32) = COPY %s0
+; CHECK: [[R1:%[0-9]+]](s32) = COPY %s1
+; CHECK: [[R2:%[0-9]+]](s32) = COPY %s2
+; CHECK: [[R3:%[0-9]+]](s32) = COPY %s3
+; CHECK: [[R_MERGED_0:%[0-9]+]](s128) = IMPLICIT_DEF
+; CHECK: [[R_MERGED_1:%[0-9]+]](s128) = G_INSERT [[R_MERGED_0]], [[R0]](s32), 0
+; CHECK: [[R_MERGED_2:%[0-9]+]](s128) = G_INSERT [[R_MERGED_1]], [[R1]](s32), 32
+; CHECK: [[R_MERGED_3:%[0-9]+]](s128) = G_INSERT [[R_MERGED_2]], [[R2]](s32), 64
+; CHECK: [[R_MERGED_4:%[0-9]+]](s128) = G_INSERT [[R_MERGED_3]], [[R3]](s32), 96
+; CHECK: [[R_MERGED:%[0-9]+]](s128) = COPY [[R_MERGED_4]]
 ; CHECK: ADJCALLSTACKUP 32, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: BX_RET 14, _
+; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 0
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 32
+; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 64
+; CHECK: [[R3:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 96
+; CHECK: %s0 = COPY [[R0]]
+; CHECK: %s1 = COPY [[R1]]
+; CHECK: %s2 = COPY [[R2]]
+; CHECK: %s3 = COPY [[R3]]
+; CHECK: BX_RET 14, _, implicit %s0, implicit %s1, implicit %s2, implicit %s3
 entry:
-  notail call arm_aapcs_vfpcc void @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z)
-  ret void
+  %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z)
+  ret [4 x float] %r
 }

-declare arm_aapcscc void @tough_arrays_target([6 x [4 x i32]] %arr)
+declare arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr)

-define arm_aapcscc void @test_tough_arrays([6 x [4 x i32]] %arr) {
+define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) {
 ; CHECK-LABEL: name: test_tough_arrays
 ; CHECK: fixedStack:
 ; The parameters live in separate stack locations, one for each element that
@ -903,12 +956,22 @@ define arm_aapcscc void @test_tough_arrays([6 x [4 x i32]] %arr) {
 ; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 76
 ; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32)
 ; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4
-; CHECK: BLX @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3
+; CHECK: BLX @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[RES_ARR0:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[RES_ARR1:%[0-9]+]](s64) = G_INSERT [[RES_ARR0]], [[R0]](s32), 0
+; CHECK: [[RES_ARR2:%[0-9]+]](s64) = G_INSERT [[RES_ARR1]], [[R1]](s32), 32
+; CHECK: [[RES_ARR:%[0-9]+]](s64) = COPY [[RES_ARR2]]
 ; CHECK: ADJCALLSTACKUP 80, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: BX_RET 14, _
+; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s64), 0
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s64), 32
+; CHECK: %r0 = COPY [[R0]]
+; CHECK: %r1 = COPY [[R1]]
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
 entry:
-  notail call arm_aapcscc void @tough_arrays_target([6 x [4 x i32]] %arr)
-  ret void
+  %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr)
+  ret [2 x i32*] %r
 }

 define i32 @test_shufflevector_s32_v2s32(i32 %arg) {