[AMDGPU] Fix large return values with amdgpu_gfx

Returning in memory is not supported, so fall back to sret.
Also, extend i1 and i16 to i32. Otherwise, they would be passed through
memory.

Differential Revision: https://reviews.llvm.org/D100543
This commit is contained in:
Sebastian Neubauer 2021-04-01 10:20:05 +02:00
parent 9d57a77b81
commit 7842e1725e
3 changed files with 1293 additions and 8 deletions

View File

@ -38,6 +38,9 @@ def CC_SI_Gfx : CallingConv<[
]>; ]>;
def RetCC_SI_Gfx : CallingConv<[ def RetCC_SI_Gfx : CallingConv<[
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
// 0-3 are reserved for the stack buffer descriptor // 0-3 are reserved for the stack buffer descriptor
// 32 is reserved for the stack pointer // 32 is reserved for the stack pointer
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
@ -68,8 +71,6 @@ def RetCC_SI_Gfx : CallingConv<[
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
]>>>, ]>>>,
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
]>; ]>;
def CC_SI_SHADER : CallingConv<[ def CC_SI_SHADER : CallingConv<[

View File

@ -216,13 +216,13 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.0, align 16, addrspace 5) ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: G_STORE [[LOAD]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) ; GCN: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
; GCN: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; GCN: S_SETPC_B64_return [[COPY2]] ; GCN: S_SETPC_B64_return [[COPY3]]
%val = call amdgpu_gfx i1 @external_gfx_i1_func_void() %val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
store volatile i1 %val, i1 addrspace(1)* undef store volatile i1 %val, i1 addrspace(1)* undef
ret void ret void

File diff suppressed because it is too large Load Diff