diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index 291d35d486a4..53087adf9738 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -214,8 +214,9 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, SmallVector SplitVTs; ArgInfo RetInfo(VReg, Val->getType()); setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(RetInfo, SplitVTs, MF, - [&](unsigned Reg, uint64_t Offset) {}); + splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { + MIRBuilder.buildExtract(Reg, VReg, Offset); + }); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -476,13 +477,34 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); + SmallVector RegOffsets; + SmallVector SplitRegs; splitToValueTypes(OrigRet, ArgInfos, MF, - [&](unsigned Reg, uint64_t Offset) {}); + [&](unsigned Reg, uint64_t Offset) { + RegOffsets.push_back(Offset); + SplitRegs.push_back(Reg); + }); auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; + + if (!RegOffsets.empty()) { + // We have split the value and allocated each individual piece, now build + // it up again. + LLT Ty = MRI.getType(OrigRet.Reg); + unsigned Dst = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildUndef(Dst); + + for (unsigned i = 0; i < SplitRegs.size(); ++i) { + unsigned Tmp = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildInsert(Tmp, Dst, SplitRegs[i], RegOffsets[i]); + Dst = Tmp; + } + + MIRBuilder.buildCopy(OrigRet.Reg, Dst); + } } // We now know the size of the stack - update the ADJCALLSTACKDOWN diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index c638b5f02cfa..f4aba880ff76 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -620,9 +620,9 @@ entry: ret float %r } -declare arm_aapcscc void @tiny_int_arrays_target([2 x i32]) +declare arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32]) -define arm_aapcscc void @test_tiny_int_arrays([2 x i32] %arr) { +define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { ; CHECK-LABEL: name: test_tiny_int_arrays ; CHECK: liveins: %r0, %r1 ; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 @@ -636,12 +636,29 @@ define arm_aapcscc void @test_tiny_int_arrays([2 x i32] %arr) { ; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR]](s64), 32 ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] -; CHECK: BLX @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1 +; CHECK: BLX @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R2:%[0-9]+]](s32) = COPY %r2 +; CHECK: [[RES_ARR0:%[0-9]+]](s96) = IMPLICIT_DEF +; CHECK: [[RES_ARR1:%[0-9]+]](s96) = G_INSERT [[RES_ARR0]], [[R0]](s32), 0 +; CHECK: [[RES_ARR2:%[0-9]+]](s96) = G_INSERT [[RES_ARR1]], [[R1]](s32), 32 +; CHECK: [[RES_ARR3:%[0-9]+]](s96) = G_INSERT [[RES_ARR2]], [[R2]](s32), 64 +; CHECK: [[RES_ARR:%[0-9]+]](s96) = COPY [[RES_ARR3]] ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ +; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s96), 0 +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s96), 32 +; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s96), 64 +; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say +; that composite types larger than 4 bytes should be passed through memory), +; but it's what DAGISel does. We should fix it in the common code for both. +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1, implicit %r2 entry: - notail call arm_aapcscc void @tiny_int_arrays_target([2 x i32] %arr) - ret void + %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr) + ret [3 x i32] %r } declare arm_aapcscc void @multiple_int_arrays_target([2 x i32], [2 x i32]) @@ -662,6 +679,14 @@ define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %ar ; CHECK: [[ARG_ARR1_2:%[0-9]+]](s64) = G_INSERT [[ARG_ARR1_1]], [[R3]](s32), 32 ; CHECK: [[ARG_ARR1:%[0-9]+]](s64) = COPY [[ARG_ARR1_2]] ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR0]](s64), 0 +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR0]](s64), 32 +; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR1]](s64), 0 +; CHECK: [[R3:%[0-9]+]](s32) = G_EXTRACT [[ARG_ARR1]](s64), 32 +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] ; CHECK: BLX @multiple_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK: BX_RET 14, _ @@ -725,9 +750,9 @@ entry: ret void } -declare arm_aapcscc void @fp_arrays_aapcs_target([3 x double]) +declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double]) -define arm_aapcscc void @test_fp_arrays_aapcs([3 x double] %arr) { +define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK-LABEL: name: test_fp_arrays_aapcs ; CHECK: fixedStack: ; CHECK: id: [[ARR2_ID:[0-9]+]], offset: 0, size: 8 @@ -767,17 +792,27 @@ define arm_aapcscc void @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK: [[ARR2_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 0 ; CHECK: [[ARR2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[ARR2_OFFSET]](s32) ; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store 8 -; CHECK: BLX @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 +; CHECK: BLX @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R_MERGED_0:%[0-9]+]](s64) = IMPLICIT_DEF +; CHECK: [[R_MERGED_1:%[0-9]+]](s64) = G_INSERT [[R_MERGED_0]], [[R0]](s32), 0 +; CHECK: [[R_MERGED_2:%[0-9]+]](s64) = G_INSERT [[R_MERGED_1]], [[R1]](s32), 32 +; CHECK: [[R_MERGED:%[0-9]+]](s64) = COPY [[R_MERGED_2]] ; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ +; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s64), 0 +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s64), 32 +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 entry: - notail call arm_aapcscc void @fp_arrays_aapcs_target([3 x double] %arr) - ret void + %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr) + ret [2 x float] %r } -declare arm_aapcs_vfpcc void @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double]) +declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double]) -define arm_aapcs_vfpcc void @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) { +define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) { ; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp ; CHECK: fixedStack: ; CHECK-DAG: id: [[Z0_ID:[0-9]+]], offset: 0, size: 8 @@ -848,17 +883,35 @@ define arm_aapcs_vfpcc void @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x floa ; CHECK: [[Z3_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 24 ; CHECK: [[Z3_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z3_OFFSET]](s32) ; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store 8 -; CHECK: BLX @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8 +; CHECK: BLX @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8, implicit-def %s0, implicit-def %s1, implicit-def %s2, implicit-def %s3 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %s0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %s1 +; CHECK: [[R2:%[0-9]+]](s32) = COPY %s2 +; CHECK: [[R3:%[0-9]+]](s32) = COPY %s3 +; CHECK: [[R_MERGED_0:%[0-9]+]](s128) = IMPLICIT_DEF +; CHECK: [[R_MERGED_1:%[0-9]+]](s128) = G_INSERT [[R_MERGED_0]], [[R0]](s32), 0 +; CHECK: [[R_MERGED_2:%[0-9]+]](s128) = G_INSERT [[R_MERGED_1]], [[R1]](s32), 32 +; CHECK: [[R_MERGED_3:%[0-9]+]](s128) = G_INSERT [[R_MERGED_2]], [[R2]](s32), 64 +; CHECK: [[R_MERGED_4:%[0-9]+]](s128) = G_INSERT [[R_MERGED_3]], [[R3]](s32), 96 +; CHECK: [[R_MERGED:%[0-9]+]](s128) = COPY [[R_MERGED_4]] ; CHECK: ADJCALLSTACKUP 32, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ +; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 0 +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 32 +; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 64 +; CHECK: [[R3:%[0-9]+]](s32) = G_EXTRACT [[R_MERGED]](s128), 96 +; CHECK: %s0 = COPY [[R0]] +; CHECK: %s1 = COPY [[R1]] +; CHECK: %s2 = COPY [[R2]] +; CHECK: %s3 = COPY [[R3]] +; CHECK: BX_RET 14, _, implicit %s0, implicit %s1, implicit %s2, implicit %s3 entry: - notail call arm_aapcs_vfpcc void @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) - ret void + %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) + ret [4 x float] %r } -declare arm_aapcscc void @tough_arrays_target([6 x [4 x i32]] %arr) +declare arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) -define arm_aapcscc void @test_tough_arrays([6 x [4 x i32]] %arr) { +define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK-LABEL: name: test_tough_arrays ; CHECK: fixedStack: ; The parameters live in separate stack locations, one for each element that @@ -903,12 +956,22 @@ define arm_aapcscc void @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 76 ; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) ; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 -; CHECK: BLX @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 +; CHECK: BLX @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[RES_ARR0:%[0-9]+]](s64) = IMPLICIT_DEF +; CHECK: [[RES_ARR1:%[0-9]+]](s64) = G_INSERT [[RES_ARR0]], [[R0]](s32), 0 +; CHECK: [[RES_ARR2:%[0-9]+]](s64) = G_INSERT [[RES_ARR1]], [[R1]](s32), 32 +; CHECK: [[RES_ARR:%[0-9]+]](s64) = COPY [[RES_ARR2]] ; CHECK: ADJCALLSTACKUP 80, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ +; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s64), 0 +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RES_ARR]](s64), 32 +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 entry: - notail call arm_aapcscc void @tough_arrays_target([6 x [4 x i32]] %arr) - ret void + %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) + ret [2 x i32*] %r } define i32 @test_shufflevector_s32_v2s32(i32 %arg) {