forked from OSchip/llvm-project
AMDGPU/GlobalISel: Insert readfirstlane on SGPR returns
In case the source value ends up in a VGPR, insert a readfirstlane to avoid producing an illegal copy later. If it turns out to be unnecessary, it can be folded out.
This commit is contained in:
parent
a314050065
commit
67cfbec746
|
@ -59,6 +59,18 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
|
|||
} else
|
||||
ExtReg = extendRegister(ValVReg, VA);
|
||||
|
||||
// If this is a scalar return, insert a readfirstlane just in case the value
|
||||
// ends up in a VGPR.
|
||||
// FIXME: Assert this is a shader return.
|
||||
const SIRegisterInfo *TRI
|
||||
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
|
||||
if (TRI->isSGPRReg(MRI, PhysReg)) {
|
||||
auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
|
||||
{MRI.getType(ExtReg)}, false)
|
||||
.addReg(ExtReg);
|
||||
ExtReg = ToSGPR.getReg(0);
|
||||
}
|
||||
|
||||
MIRBuilder.buildCopy(PhysReg, ExtReg);
|
||||
MIB.addUse(PhysReg, RegState::Implicit);
|
||||
}
|
||||
|
|
|
@ -29,8 +29,7 @@ define amdgpu_ps i32 @s_bswap_i32(i32 inreg %src) {
|
|||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call i32 @llvm.bswap.i32(i32 %src)
|
||||
%to.sgpr = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap)
|
||||
ret i32 %to.sgpr
|
||||
ret i32 %bswap
|
||||
}
|
||||
|
||||
define i32 @v_bswap_i32(i32 %src) {
|
||||
|
@ -96,13 +95,7 @@ define amdgpu_ps <2 x i32> @s_bswap_v2i32(<2 x i32> inreg %src) {
|
|||
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %src)
|
||||
%bswap.0 = extractelement <2 x i32> %bswap, i32 0
|
||||
%bswap.1 = extractelement <2 x i32> %bswap, i32 1
|
||||
%to.sgpr0 = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap.0)
|
||||
%to.sgpr1 = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap.1)
|
||||
%ins.0 = insertelement <2 x i32> undef, i32 %to.sgpr0, i32 0
|
||||
%ins.1 = insertelement <2 x i32> %ins.0, i32 %to.sgpr1, i32 1
|
||||
ret <2 x i32> %ins.1
|
||||
ret <2 x i32> %bswap
|
||||
}
|
||||
|
||||
define <2 x i32> @v_bswap_v2i32(<2 x i32> %src) {
|
||||
|
@ -137,7 +130,7 @@ define <2 x i32> @v_bswap_v2i32(<2 x i32> %src) {
|
|||
ret <2 x i32> %bswap
|
||||
}
|
||||
|
||||
define amdgpu_ps <2 x i32> @s_bswap_i64(i64 inreg %src) {
|
||||
define amdgpu_ps i64 @s_bswap_i64(i64 inreg %src) {
|
||||
; GFX7-LABEL: s_bswap_i64:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, s1, s1, 8
|
||||
|
@ -173,14 +166,7 @@ define amdgpu_ps <2 x i32> @s_bswap_i64(i64 inreg %src) {
|
|||
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call i64 @llvm.bswap.i64(i64 %src)
|
||||
%cast = bitcast i64 %bswap to <2 x i32>
|
||||
%elt0 = extractelement <2 x i32> %cast, i32 0
|
||||
%elt1 = extractelement <2 x i32> %cast, i32 1
|
||||
%to.sgpr0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt0)
|
||||
%to.sgpr1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt1)
|
||||
%ins.0 = insertelement <2 x i32> undef, i32 %to.sgpr0, i32 0
|
||||
%ins.1 = insertelement <2 x i32> %ins.0, i32 %to.sgpr1, i32 1
|
||||
ret <2 x i32> %ins.1
|
||||
ret i64 %bswap
|
||||
}
|
||||
|
||||
define i64 @v_bswap_i64(i64 %src) {
|
||||
|
@ -218,7 +204,7 @@ define i64 @v_bswap_i64(i64 %src) {
|
|||
ret i64 %bswap
|
||||
}
|
||||
|
||||
define amdgpu_ps <4 x i32> @s_bswap_v2i64(<2 x i64> inreg %src) {
|
||||
define amdgpu_ps <2 x i64> @s_bswap_v2i64(<2 x i64> inreg %src) {
|
||||
; GFX7-LABEL: s_bswap_v2i64:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, s1, s1, 8
|
||||
|
@ -274,20 +260,7 @@ define amdgpu_ps <4 x i32> @s_bswap_v2i64(<2 x i64> inreg %src) {
|
|||
; GFX9-NEXT: v_readfirstlane_b32 s3, v3
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %src)
|
||||
%cast = bitcast <2 x i64> %bswap to <4 x i32>
|
||||
%bswap.0 = extractelement <4 x i32> %cast, i32 0
|
||||
%bswap.1 = extractelement <4 x i32> %cast, i32 1
|
||||
%bswap.2 = extractelement <4 x i32> %cast, i32 2
|
||||
%bswap.3 = extractelement <4 x i32> %cast, i32 3
|
||||
%to.sgpr0 = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap.0)
|
||||
%to.sgpr1 = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap.1)
|
||||
%to.sgpr2 = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap.2)
|
||||
%to.sgpr3 = call i32 @llvm.amdgcn.readfirstlane(i32 %bswap.3)
|
||||
%ins.0 = insertelement <4 x i32> undef, i32 %to.sgpr0, i32 0
|
||||
%ins.1 = insertelement <4 x i32> %ins.0, i32 %to.sgpr1, i32 1
|
||||
%ins.2 = insertelement <4 x i32> %ins.1, i32 %to.sgpr2, i32 2
|
||||
%ins.3 = insertelement <4 x i32> %ins.2, i32 %to.sgpr3, i32 3
|
||||
ret <4 x i32> %ins.3
|
||||
ret <2 x i64> %bswap
|
||||
}
|
||||
|
||||
define <2 x i64> @v_bswap_v2i64(<2 x i64> %src) {
|
||||
|
@ -345,7 +318,6 @@ define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) {
|
|||
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
|
||||
; GFX7-NEXT: s_lshr_b32 s0, s0, 8
|
||||
; GFX7-NEXT: s_or_b32 s0, s0, s1
|
||||
; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX8-LABEL: s_bswap_i16:
|
||||
|
@ -364,10 +336,7 @@ define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) {
|
|||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call i16 @llvm.bswap.i16(i16 %src)
|
||||
%zext = zext i16 %bswap to i32
|
||||
%to.sgpr = call i32 @llvm.amdgcn.readfirstlane(i32 %zext)
|
||||
%trunc = trunc i32 %to.sgpr to i16
|
||||
ret i16 %trunc
|
||||
ret i16 %bswap
|
||||
}
|
||||
|
||||
define i16 @v_bswap_i16(i16 %src) {
|
||||
|
@ -431,9 +400,8 @@ define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) {
|
|||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
|
||||
%cast0 = bitcast <2 x i16> %bswap to i32
|
||||
%to.sgpr = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0)
|
||||
ret i32 %to.sgpr
|
||||
%cast = bitcast <2 x i16> %bswap to i32
|
||||
ret i32 %cast
|
||||
}
|
||||
|
||||
define i32 @v_bswap_i16_zext_to_i32(i16 %src) {
|
||||
|
@ -574,7 +542,6 @@ define i64 @v_bswap_i48(i64 %src) {
|
|||
ret i64 %zext
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
|
||||
declare i16 @llvm.bswap.i16(i16) #1
|
||||
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
|
||||
declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>) #1
|
||||
|
|
|
@ -31,6 +31,77 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps float @vgpr_return(i32 %vgpr) {
|
||||
; CHECK-LABEL: name: vgpr_return
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: $vgpr0 = COPY [[COPY]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%cast = bitcast i32 %vgpr to float
|
||||
ret float %cast
|
||||
}
|
||||
|
||||
define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) {
|
||||
; CHECK-LABEL: name: sgpr_return_i32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
ret i32 %vgpr
|
||||
}
|
||||
|
||||
define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) {
|
||||
; CHECK-LABEL: name: sgpr_return_i64
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
ret i64 %vgpr
|
||||
}
|
||||
|
||||
define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) {
|
||||
; CHECK-LABEL: name: sgpr_return_v2i32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
ret <2 x i32> %vgpr
|
||||
}
|
||||
|
||||
define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1) {
|
||||
; CHECK-LABEL: name: sgpr_struct_return_i32_i32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
%insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0
|
||||
%value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1
|
||||
ret { i32, i32 } %value
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
@ -1,73 +1,99 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator %s -o - | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: name: test_f32_inreg
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S0]]
|
||||
define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
|
||||
; CHECK-LABEL: name: test_f32_inreg
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_f32
|
||||
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]]
|
||||
define amdgpu_vs void @test_f32(float %arg0) {
|
||||
; CHECK-LABEL: name: test_f32
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_ptr2_inreg
|
||||
; CHECK: [[S2:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S3:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[PTR:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S2]](s32), [[S3]](s32)
|
||||
; CHECK: G_LOAD [[PTR]]
|
||||
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
|
||||
; CHECK-LABEL: name: test_ptr2_inreg
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load 4 from %ir.arg0, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_sgpr_alignment0
|
||||
; CHECK: [[S2:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S3:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4
|
||||
; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32)
|
||||
; CHECK: G_LOAD [[S34]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S2]](s32)
|
||||
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
|
||||
; CHECK-LABEL: name: test_sgpr_alignment0
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
|
||||
; CHECK: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load 4 from %ir.arg1, addrspace 4)
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
|
||||
; CHECK: S_ENDPGM 0
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: test_order
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
|
||||
define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) {
|
||||
; CHECK-LABEL: name: test_order
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: name: ret_struct
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: $sgpr0 = COPY [[S0]]
|
||||
; CHECK: $sgpr1 = COPY [[S1]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) {
|
||||
; CHECK-LABEL: name: ret_struct
|
||||
; CHECK: bb.1.main_body:
|
||||
; CHECK: liveins: $sgpr2, $sgpr3
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
main_body:
|
||||
%tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0
|
||||
%tmp1 = insertvalue <{ i32, i32 }> %tmp0, i32 %arg1, 1
|
||||
ret <{ i32, i32 }> %tmp1
|
||||
}
|
||||
|
||||
; CHECK_LABEL: name: non_void_ret
|
||||
; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: $sgpr0 = COPY [[ZERO]]
|
||||
; SI_RETURN_TO_EPILOG $sgpr0
|
||||
define amdgpu_vs i32 @non_void_ret() {
|
||||
; CHECK-LABEL: name: non_void_ret
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -14,7 +14,9 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
|
|||
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4)
|
||||
; CHECK: $sgpr0 = COPY [[AMDGPU_S_BUFFER_LOAD]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret i32 %val
|
||||
|
@ -32,8 +34,12 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
|
|||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4)
|
||||
; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>)
|
||||
; CHECK: $sgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
%val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <2 x i32> %val
|
||||
|
@ -52,9 +58,15 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
|
|||
; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4)
|
||||
; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0
|
||||
; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s32>)
|
||||
; CHECK: $sgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: $sgpr2 = COPY [[UV2]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK: $sgpr2 = COPY [[INT2]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
|
||||
%val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <3 x i32> %val
|
||||
|
@ -72,14 +84,30 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
|
|||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4)
|
||||
; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
|
||||
; CHECK: $sgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: $sgpr2 = COPY [[UV2]](s32)
|
||||
; CHECK: $sgpr3 = COPY [[UV3]](s32)
|
||||
; CHECK: $sgpr4 = COPY [[UV4]](s32)
|
||||
; CHECK: $sgpr5 = COPY [[UV5]](s32)
|
||||
; CHECK: $sgpr6 = COPY [[UV6]](s32)
|
||||
; CHECK: $sgpr7 = COPY [[UV7]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK: $sgpr2 = COPY [[INT2]](s32)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
|
||||
; CHECK: $sgpr3 = COPY [[INT3]](s32)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
|
||||
; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
|
||||
; CHECK: $sgpr4 = COPY [[INT4]](s32)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
|
||||
; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
|
||||
; CHECK: $sgpr5 = COPY [[INT5]](s32)
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
|
||||
; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
|
||||
; CHECK: $sgpr6 = COPY [[INT6]](s32)
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
|
||||
; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
|
||||
; CHECK: $sgpr7 = COPY [[INT7]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
|
||||
%val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <8 x i32> %val
|
||||
|
@ -97,22 +125,54 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
|
|||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4)
|
||||
; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>)
|
||||
; CHECK: $sgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: $sgpr2 = COPY [[UV2]](s32)
|
||||
; CHECK: $sgpr3 = COPY [[UV3]](s32)
|
||||
; CHECK: $sgpr4 = COPY [[UV4]](s32)
|
||||
; CHECK: $sgpr5 = COPY [[UV5]](s32)
|
||||
; CHECK: $sgpr6 = COPY [[UV6]](s32)
|
||||
; CHECK: $sgpr7 = COPY [[UV7]](s32)
|
||||
; CHECK: $sgpr8 = COPY [[UV8]](s32)
|
||||
; CHECK: $sgpr9 = COPY [[UV9]](s32)
|
||||
; CHECK: $sgpr10 = COPY [[UV10]](s32)
|
||||
; CHECK: $sgpr11 = COPY [[UV11]](s32)
|
||||
; CHECK: $sgpr12 = COPY [[UV12]](s32)
|
||||
; CHECK: $sgpr13 = COPY [[UV13]](s32)
|
||||
; CHECK: $sgpr14 = COPY [[UV14]](s32)
|
||||
; CHECK: $sgpr15 = COPY [[UV15]](s32)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; CHECK: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; CHECK: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
|
||||
; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK: $sgpr2 = COPY [[INT2]](s32)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
|
||||
; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
|
||||
; CHECK: $sgpr3 = COPY [[INT3]](s32)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
|
||||
; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
|
||||
; CHECK: $sgpr4 = COPY [[INT4]](s32)
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
|
||||
; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
|
||||
; CHECK: $sgpr5 = COPY [[INT5]](s32)
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
|
||||
; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
|
||||
; CHECK: $sgpr6 = COPY [[INT6]](s32)
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
|
||||
; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
|
||||
; CHECK: $sgpr7 = COPY [[INT7]](s32)
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32)
|
||||
; CHECK: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
|
||||
; CHECK: $sgpr8 = COPY [[INT8]](s32)
|
||||
; CHECK: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32)
|
||||
; CHECK: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
|
||||
; CHECK: $sgpr9 = COPY [[INT9]](s32)
|
||||
; CHECK: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32)
|
||||
; CHECK: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
|
||||
; CHECK: $sgpr10 = COPY [[INT10]](s32)
|
||||
; CHECK: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32)
|
||||
; CHECK: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
|
||||
; CHECK: $sgpr11 = COPY [[INT11]](s32)
|
||||
; CHECK: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32)
|
||||
; CHECK: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
|
||||
; CHECK: $sgpr12 = COPY [[INT12]](s32)
|
||||
; CHECK: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32)
|
||||
; CHECK: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
|
||||
; CHECK: $sgpr13 = COPY [[INT13]](s32)
|
||||
; CHECK: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32)
|
||||
; CHECK: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
|
||||
; CHECK: $sgpr14 = COPY [[INT14]](s32)
|
||||
; CHECK: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32)
|
||||
; CHECK: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
|
||||
; CHECK: $sgpr15 = COPY [[INT15]](s32)
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
|
||||
%val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <16 x i32> %val
|
||||
|
|
Loading…
Reference in New Issue