forked from OSchip/llvm-project
[AMDGPU] Fix large return values with amdgpu_gfx
Returning in memory is not supported, so fall back to sret. Also, extend i1 and i16 to i32. Otherwise, they would be passed through memory. Differential Revision: https://reviews.llvm.org/D100543
This commit is contained in:
parent
9d57a77b81
commit
7842e1725e
|
@ -38,6 +38,9 @@ def CC_SI_Gfx : CallingConv<[
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def RetCC_SI_Gfx : CallingConv<[
|
def RetCC_SI_Gfx : CallingConv<[
|
||||||
|
CCIfType<[i1], CCPromoteToType<i32>>,
|
||||||
|
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
|
||||||
|
|
||||||
// 0-3 are reserved for the stack buffer descriptor
|
// 0-3 are reserved for the stack buffer descriptor
|
||||||
// 32 is reserved for the stack pointer
|
// 32 is reserved for the stack pointer
|
||||||
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
|
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
|
||||||
|
@ -68,8 +71,6 @@ def RetCC_SI_Gfx : CallingConv<[
|
||||||
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
|
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
|
||||||
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
|
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
|
||||||
]>>>,
|
]>>>,
|
||||||
|
|
||||||
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
|
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def CC_SI_SHADER : CallingConv<[
|
def CC_SI_SHADER : CallingConv<[
|
||||||
|
|
|
@ -216,13 +216,13 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
|
||||||
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
|
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
|
||||||
; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
|
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
|
||||||
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
|
||||||
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
|
; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||||
; GCN: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.0, align 16, addrspace 5)
|
; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
|
||||||
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
||||||
; GCN: G_STORE [[LOAD]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
|
; GCN: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
|
||||||
; GCN: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||||
; GCN: S_SETPC_B64_return [[COPY2]]
|
; GCN: S_SETPC_B64_return [[COPY3]]
|
||||||
%val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
|
%val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
|
||||||
store volatile i1 %val, i1 addrspace(1)* undef
|
store volatile i1 %val, i1 addrspace(1)* undef
|
||||||
ret void
|
ret void
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue