From 9719f17011a2d5b4020eaaa04b6346c2d10db80c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 9 Jan 2021 10:44:29 -0500 Subject: [PATCH] AMDGPU: Move handling of allocation of fixed ABI inputs For the fixed ABI, set this in the initial argument constructor, rather than relying on the allocation logic to set the values. Also stop passing them for amdgpu_gfx, since the DAG path seems to skip these. I'm unclear on what amdgpu_gfx's expectations are. This will allow moving the special input registers out of the normal argument range. --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 3 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 42 +++- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 4 + .../irtranslator-call-return-values.ll | 222 ++++-------------- .../AMDGPU/GlobalISel/irtranslator-call.ll | 200 ++++------------ .../GlobalISel/irtranslator-indirect-call.ll | 44 +--- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll | 3 + 7 files changed, 141 insertions(+), 377 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 852a05b3c181..2430c6aee389 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -1295,7 +1295,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // after the ordinary user argument registers. SmallVector, 12> ImplicitArgRegs; - if (AMDGPUTargetMachine::EnableFixedFunctionABI) { + if (AMDGPUTargetMachine::EnableFixedFunctionABI && + Info.CallConv != CallingConv::AMDGPU_Gfx) { // With a fixed ABI, allocate fixed registers before user arguments. if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) return false; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 31cdcfba76f3..e73f2b4b38a9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1865,12 +1865,32 @@ static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo, return ArgDescriptor::createRegister(Reg); } -static ArgDescriptor allocateSGPR32Input(CCState &CCInfo) { - return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32); +// If this has a fixed position, we still should allocate the register in the +// CCInfo state. Technically we could get away with this for values passed +// outside of the normal argument range. +static void allocateFixedSGPRInputImpl(CCState &CCInfo, + const TargetRegisterClass *RC, + MCRegister Reg) { + Reg = CCInfo.AllocateReg(Reg); + assert(Reg != AMDGPU::NoRegister); + MachineFunction &MF = CCInfo.getMachineFunction(); + MF.addLiveIn(Reg, RC); } -static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) { - return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16); +static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg) { + if (Arg) { + allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, + Arg.getRegister()); + } else + Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32); +} + +static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg) { + if (Arg) { + allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, + Arg.getRegister()); + } else + Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16); } /// Allocate implicit function VGPR arguments at the end of allocated user @@ -1919,29 +1939,29 @@ void SITargetLowering::allocateSpecialInputSGPRs( // TODO: Unify handling with private memory pointers. if (Info.hasDispatchPtr()) - ArgInfo.DispatchPtr = allocateSGPR64Input(CCInfo); + allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); if (Info.hasQueuePtr()) - ArgInfo.QueuePtr = allocateSGPR64Input(CCInfo); + allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a // constant offset from the kernarg segment. if (Info.hasImplicitArgPtr()) - ArgInfo.ImplicitArgPtr = allocateSGPR64Input(CCInfo); + allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr); if (Info.hasDispatchID()) - ArgInfo.DispatchID = allocateSGPR64Input(CCInfo); + allocateSGPR64Input(CCInfo, ArgInfo.DispatchID); // flat_scratch_init is not applicable for non-kernel functions. if (Info.hasWorkGroupIDX()) - ArgInfo.WorkGroupIDX = allocateSGPR32Input(CCInfo); + allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX); if (Info.hasWorkGroupIDY()) - ArgInfo.WorkGroupIDY = allocateSGPR32Input(CCInfo); + allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY); if (Info.hasWorkGroupIDZ()) - ArgInfo.WorkGroupIDZ = allocateSGPR32Input(CCInfo); + allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); } // Allocate special inputs passed in user SGPRs. diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 9a0cdc7b1f4d..13f4e7c7de86 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -49,6 +49,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't // have any calls. const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI && + CC != CallingConv::AMDGPU_Gfx && (!isEntryFunction() || HasCalls); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { @@ -61,6 +62,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } if (!isEntryFunction()) { + if (UseFixedABI) + ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; + // TODO: Pick a high register, and shift down, similar to a kernel. FrameOffsetReg = AMDGPU::SGPR33; StackPtrOffsetReg = AMDGPU::SGPR32; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 7699738afebb..9dff189a5068 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -128,47 +128,23 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN: $vgpr0 = COPY [[C]](s32) - ; GCN: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN: $sgpr12 = COPY [[COPY15]](s32) - ; GCN: $sgpr13 = COPY [[COPY16]](s32) - ; GCN: $sgpr14 = COPY [[COPY17]](s32) - ; GCN: $vgpr31 = COPY [[COPY18]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY20]](s32), [[MV]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; GCN: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; GCN: S_SETPC_B64_return [[COPY21]] + ; GCN: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; GCN: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN: S_SETPC_B64_return [[COPY5]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -233,44 +209,20 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN: $sgpr12 = COPY [[COPY13]](s32) - ; GCN: $sgpr13 = COPY [[COPY14]](s32) - ; GCN: $sgpr14 = COPY [[COPY15]](s32) - ; GCN: $vgpr31 = COPY [[COPY16]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.0, align 16, addrspace 5) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[LOAD]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) - ; GCN: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GCN: S_SETPC_B64_return [[COPY18]] + ; GCN: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY2]] %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef ret void @@ -452,45 +404,21 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN: $sgpr12 = COPY [[COPY13]](s32) - ; GCN: $sgpr13 = COPY [[COPY14]](s32) - ; GCN: $sgpr14 = COPY [[COPY15]](s32) - ; GCN: $vgpr31 = COPY [[COPY16]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GCN: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GCN: S_SETPC_B64_return [[COPY19]] + ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY3]] %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef ret void @@ -844,43 +772,19 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN: $sgpr12 = COPY [[COPY13]](s32) - ; GCN: $sgpr13 = COPY [[COPY14]](s32) - ; GCN: $sgpr14 = COPY [[COPY15]](s32) - ; GCN: $vgpr31 = COPY [[COPY16]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY18]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GCN: S_SETPC_B64_return [[COPY19]] + ; GCN: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY3]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -2562,48 +2466,24 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN: $sgpr12 = COPY [[COPY14]](s32) - ; GCN: $sgpr13 = COPY [[COPY15]](s32) - ; GCN: $sgpr14 = COPY [[COPY16]](s32) - ; GCN: $vgpr31 = COPY [[COPY17]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GCN: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[MV]](s64), [[COPY9]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) - ; GCN: [[COPY22:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GCN: S_SETPC_B64_return [[COPY22]] + ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GCN: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY6]] %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 955c75b10459..37094f4e8005 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -148,40 +148,16 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY18]] + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY2]] call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -895,43 +871,19 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: $vgpr0 = COPY [[C]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK: $vgpr31 = COPY [[COPY17]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; CHECK: S_SETPC_B64_return [[COPY19]] + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -939,43 +891,19 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK: liveins: $sgpr4, $sgpr5, $sgpr14, $sgpr15, $vgpr31, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr5 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr12_sgpr13 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: $sgpr15 = COPY [[C]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK: $vgpr31 = COPY [[COPY17]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: $sgpr4 = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; CHECK: S_SETPC_B64_return [[COPY19]] + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3884,16 +3812,8 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) @@ -3902,31 +3822,15 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY18]] + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -3936,16 +3840,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) @@ -3954,31 +3850,15 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; CHECK: $sgpr15 = COPY [[ANYEXT]](s32) - ; CHECK: $sgpr16 = COPY [[LOAD2]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr15, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: $sgpr4 = COPY [[ANYEXT]](s32) + ; CHECK: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY18]] + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index ab4562501fe8..44d662ef0947 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -56,42 +56,18 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK: $vgpr31 = COPY [[COPY18]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY20]] + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY4]] call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 6c384842b557..4e03df464581 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -2,6 +2,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s ; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s +; Make sure this interacts well with -amdgpu-fixed-function-abi +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s + declare float @extern_func(float) #0 declare float @extern_func_many_args(<64 x float>) #0