AMDGPU: Move handling of allocation of fixed ABI inputs

For the fixed ABI, set this in the initial argument constructor,
rather than relying on the allocation logic to set the values. Also
stop passing them for amdgpu_gfx, since the DAG path seems to skip
these. I'm unclear on what amdgpu_gfx's expectations are.  This will
allow moving the special input registers out of the normal argument
range.
This commit is contained in:
Matt Arsenault 2021-01-09 10:44:29 -05:00
parent 916c4121c1
commit 9719f17011
7 changed files with 141 additions and 377 deletions

View File

@ -1295,7 +1295,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
if (AMDGPUTargetMachine::EnableFixedFunctionABI) {
if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
Info.CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;

View File

@ -1865,12 +1865,32 @@ static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo,
return ArgDescriptor::createRegister(Reg);
}
static ArgDescriptor allocateSGPR32Input(CCState &CCInfo) {
return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32);
// If this has a fixed position, we still should allocate the register in the
// CCInfo state. Technically we could get away with this for values passed
// outside of the normal argument range.
static void allocateFixedSGPRInputImpl(CCState &CCInfo,
const TargetRegisterClass *RC,
MCRegister Reg) {
Reg = CCInfo.AllocateReg(Reg);
assert(Reg != AMDGPU::NoRegister);
MachineFunction &MF = CCInfo.getMachineFunction();
MF.addLiveIn(Reg, RC);
}
static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) {
return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg) {
if (Arg) {
allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_32RegClass,
Arg.getRegister());
} else
Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32);
}
static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg) {
if (Arg) {
allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_64RegClass,
Arg.getRegister());
} else
Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
}
/// Allocate implicit function VGPR arguments at the end of allocated user
@ -1919,29 +1939,29 @@ void SITargetLowering::allocateSpecialInputSGPRs(
// TODO: Unify handling with private memory pointers.
if (Info.hasDispatchPtr())
ArgInfo.DispatchPtr = allocateSGPR64Input(CCInfo);
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
if (Info.hasQueuePtr())
ArgInfo.QueuePtr = allocateSGPR64Input(CCInfo);
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
if (Info.hasImplicitArgPtr())
ArgInfo.ImplicitArgPtr = allocateSGPR64Input(CCInfo);
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
if (Info.hasDispatchID())
ArgInfo.DispatchID = allocateSGPR64Input(CCInfo);
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
if (Info.hasWorkGroupIDX())
ArgInfo.WorkGroupIDX = allocateSGPR32Input(CCInfo);
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
if (Info.hasWorkGroupIDY())
ArgInfo.WorkGroupIDY = allocateSGPR32Input(CCInfo);
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
if (Info.hasWorkGroupIDZ())
ArgInfo.WorkGroupIDZ = allocateSGPR32Input(CCInfo);
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
}
// Allocate special inputs passed in user SGPRs.

View File

@ -49,6 +49,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
// have any calls.
const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
CC != CallingConv::AMDGPU_Gfx &&
(!isEntryFunction() || HasCalls);
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
@ -61,6 +62,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
if (!isEntryFunction()) {
if (UseFixedABI)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
// TODO: Pick a high register, and shift down, similar to a kernel.
FrameOffsetReg = AMDGPU::SGPR33;
StackPtrOffsetReg = AMDGPU::SGPR32;

View File

@ -128,47 +128,23 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm
; GCN: bb.1 (%ir-block.0):
; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32
; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GCN: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN: $vgpr0 = COPY [[C]](s32)
; GCN: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
; GCN: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
; GCN: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
; GCN: $sgpr12 = COPY [[COPY15]](s32)
; GCN: $sgpr13 = COPY [[COPY16]](s32)
; GCN: $sgpr14 = COPY [[COPY17]](s32)
; GCN: $vgpr31 = COPY [[COPY18]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: G_STORE [[COPY20]](s32), [[MV]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
; GCN: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
; GCN: S_SETPC_B64_return [[COPY21]]
; GCN: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
; GCN: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; GCN: S_SETPC_B64_return [[COPY5]]
%val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42)
store volatile i32 %val, i32 addrspace(1)* %out
ret void
@ -233,44 +209,20 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
; GCN-LABEL: name: test_gfx_call_external_i1_func_void
; GCN: bb.1 (%ir-block.0):
; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: liveins: $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GCN: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; GCN: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; GCN: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; GCN: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; GCN: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; GCN: $sgpr12 = COPY [[COPY13]](s32)
; GCN: $sgpr13 = COPY [[COPY14]](s32)
; GCN: $sgpr14 = COPY [[COPY15]](s32)
; GCN: $vgpr31 = COPY [[COPY16]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.0, align 16, addrspace 5)
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: G_STORE [[LOAD]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; GCN: S_SETPC_B64_return [[COPY18]]
; GCN: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN: S_SETPC_B64_return [[COPY2]]
%val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
store volatile i1 %val, i1 addrspace(1)* undef
ret void
@ -452,45 +404,21 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
; GCN-LABEL: name: test_gfx_call_external_i8_func_void
; GCN: bb.1 (%ir-block.0):
; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: liveins: $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void
; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GCN: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; GCN: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; GCN: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; GCN: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; GCN: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; GCN: $sgpr12 = COPY [[COPY13]](s32)
; GCN: $sgpr13 = COPY [[COPY14]](s32)
; GCN: $sgpr14 = COPY [[COPY15]](s32)
; GCN: $vgpr31 = COPY [[COPY16]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GCN: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; GCN: S_SETPC_B64_return [[COPY19]]
; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN: S_SETPC_B64_return [[COPY3]]
%val = call amdgpu_gfx i8 @external_gfx_i8_func_void()
store volatile i8 %val, i8 addrspace(1)* undef
ret void
@ -844,43 +772,19 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
; GCN-LABEL: name: test_gfx_call_external_i32_func_void
; GCN: bb.1 (%ir-block.0):
; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: liveins: $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void
; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GCN: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; GCN: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; GCN: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; GCN: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; GCN: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; GCN: $sgpr12 = COPY [[COPY13]](s32)
; GCN: $sgpr13 = COPY [[COPY14]](s32)
; GCN: $sgpr14 = COPY [[COPY15]](s32)
; GCN: $vgpr31 = COPY [[COPY16]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: G_STORE [[COPY18]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; GCN: S_SETPC_B64_return [[COPY19]]
; GCN: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN: S_SETPC_B64_return [[COPY3]]
%val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
@ -2562,48 +2466,24 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void
; GCN: bb.1 (%ir-block.0):
; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: liveins: $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GCN: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; GCN: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
; GCN: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
; GCN: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
; GCN: $sgpr12 = COPY [[COPY14]](s32)
; GCN: $sgpr13 = COPY [[COPY15]](s32)
; GCN: $sgpr14 = COPY [[COPY16]](s32)
; GCN: $vgpr31 = COPY [[COPY17]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
; GCN: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
; GCN: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: G_STORE [[MV]](s64), [[COPY9]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY22:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; GCN: S_SETPC_B64_return [[COPY22]]
; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; GCN: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN: S_SETPC_B64_return [[COPY6]]
%val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void()
%val.0 = extractvalue { i32, i64 } %val, 0
%val.1 = extractvalue { i32, i64 } %val, 1

View File

@ -148,40 +148,16 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
; CHECK-LABEL: name: test_gfx_call_external_void_func_void
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; CHECK: $sgpr12 = COPY [[COPY13]](s32)
; CHECK: $sgpr13 = COPY [[COPY14]](s32)
; CHECK: $sgpr14 = COPY [[COPY15]](s32)
; CHECK: $vgpr31 = COPY [[COPY16]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK: S_SETPC_B64_return [[COPY18]]
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY2]]
call amdgpu_gfx void @external_gfx_void_func_void()
ret void
}
@ -895,43 +871,19 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm
; CHECK: bb.1 (%ir-block.1):
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: $vgpr0 = COPY [[C]](s32)
; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
; CHECK: $sgpr12 = COPY [[COPY14]](s32)
; CHECK: $sgpr13 = COPY [[COPY15]](s32)
; CHECK: $sgpr14 = COPY [[COPY16]](s32)
; CHECK: $vgpr31 = COPY [[COPY17]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
; CHECK: S_SETPC_B64_return [[COPY19]]
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY3]]
call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
ret void
}
@ -939,43 +891,19 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg
; CHECK: bb.1 (%ir-block.1):
; CHECK: liveins: $sgpr4, $sgpr5, $sgpr14, $sgpr15, $vgpr31, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr12_sgpr13
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr4
; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: liveins: $sgpr4, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: $sgpr15 = COPY [[C]](s32)
; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
; CHECK: $sgpr12 = COPY [[COPY14]](s32)
; CHECK: $sgpr13 = COPY [[COPY15]](s32)
; CHECK: $sgpr14 = COPY [[COPY16]](s32)
; CHECK: $vgpr31 = COPY [[COPY17]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: $sgpr4 = COPY [[C]](s32)
; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
; CHECK: S_SETPC_B64_return [[COPY19]]
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY3]]
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
ret void
}
@ -3884,16 +3812,8 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1)
@ -3902,31 +3822,15 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1)
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
; CHECK: $vgpr1 = COPY [[LOAD2]](s32)
; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; CHECK: $sgpr12 = COPY [[COPY13]](s32)
; CHECK: $sgpr13 = COPY [[COPY14]](s32)
; CHECK: $sgpr14 = COPY [[COPY15]](s32)
; CHECK: $vgpr31 = COPY [[COPY16]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK: S_SETPC_B64_return [[COPY18]]
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY2]]
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
@ -3936,16 +3840,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1)
@ -3954,31 +3850,15 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1)
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
; CHECK: $sgpr15 = COPY [[ANYEXT]](s32)
; CHECK: $sgpr16 = COPY [[LOAD2]](s32)
; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
; CHECK: $sgpr12 = COPY [[COPY13]](s32)
; CHECK: $sgpr13 = COPY [[COPY14]](s32)
; CHECK: $sgpr14 = COPY [[COPY15]](s32)
; CHECK: $vgpr31 = COPY [[COPY16]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr15, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: $sgpr4 = COPY [[ANYEXT]](s32)
; CHECK: $sgpr5 = COPY [[LOAD2]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK: S_SETPC_B64_return [[COPY18]]
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY2]]
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)

View File

@ -56,42 +56,18 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
; CHECK: $sgpr12 = COPY [[COPY15]](s32)
; CHECK: $sgpr13 = COPY [[COPY16]](s32)
; CHECK: $sgpr14 = COPY [[COPY17]](s32)
; CHECK: $vgpr31 = COPY [[COPY18]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
; CHECK: S_SETPC_B64_return [[COPY20]]
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY4]]
call amdgpu_gfx void %fptr()
ret void
}

View File

@ -2,6 +2,9 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
; Make sure this interacts well with -amdgpu-fixed-function-abi
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
declare float @extern_func(float) #0
declare float @extern_func_many_args(<64 x float>) #0