llvm-project/llvm/test/CodeGen/AMDGPU/si-lod-bias.ll

; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

; This shader has the potential to generated illegal VGPR to SGPR copies if
; the wrong register class is used for the REG_SEQUENCE instructions.

; GCN-LABEL: {{^}}main:
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
  %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
  %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
  %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
  %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
  %tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
  %tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
  %tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0
  %i.i = extractelement <2 x i32> %arg5, i32 0
  %j.i = extractelement <2 x i32> %arg5, i32 1
  %i.f.i = bitcast i32 %i.i to float
  %j.f.i = bitcast i32 %j.i to float
  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0
  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0
  %i.i1 = extractelement <2 x i32> %arg5, i32 0
  %j.i2 = extractelement <2 x i32> %arg5, i32 1
  %i.f.i3 = bitcast i32 %i.i1 to float
  %j.f.i4 = bitcast i32 %j.i2 to float
  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg3) #0
  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg3) #0
  %tmp28 = bitcast float %tmp21 to i32
  %tmp29 = bitcast float %p2.i to i32
  %tmp30 = bitcast float %p2.i6 to i32
  %tmp31 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
  %tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
  %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
  %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
  %tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float>
  %tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
  %tmp36 = extractelement <4 x float> %tmp35, i32 0
  %tmp37 = extractelement <4 x float> %tmp35, i32 1
  %tmp38 = extractelement <4 x float> %tmp35, i32 2
  %tmp39 = extractelement <4 x float> %tmp35, i32 3
  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0
  ret void
}

declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readonly }

!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", !2}
!2 = !{!"tbaa root"}
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s`
			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00
R600/SI: Change formatting of printed registers. Print the range of registers used with a single letter prefix. This better matches what the shader compiler produces and is overall less obnoxious than concatenating all of the subregister names together. Instead of SGPR0, it will print s0. Instead of SGPR0_SGPR1, it will print s[0:1] and so on. There doesn't appear to be a straightforward way to get the actual register info in the InstPrinter, so this parses the generated name to print with the new syntax. The required test changes are pretty nasty, and register matching regexes are now worse. Since there isn't a way to add to a variable in FileCheck, some of the tests now don't check the exact number of registers used, but I don't think that will be a real problem. llvm-svn: 194443 2013-11-12 10:35:51 +08:00			`; This shader has the potential to generated illegal VGPR to SGPR copies if`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`; the wrong register class is used for the REG_SEQUENCE instructions.`

AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`; GCN-LABEL: {{^}}main:`
			`; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf`
AMDGPU: Remove SITypeRewriter This was an old workaround for using v16i8 in some old intrinsics for resource descriptors. llvm-svn: 306603 2017-06-29 05:38:50 +08:00			`define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`main_body:`
AMDGPU: Remove SITypeRewriter This was an old workaround for using v16i8 in some old intrinsics for resource descriptors. llvm-svn: 306603 2017-06-29 05:38:50 +08:00			`%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0`
			`%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0`
			`%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)`
AMDGPU: Remove old sample intrinsics I did my best to try to update all the uses in tests that just happened to use the old ones to the newer intrinsics. I'm not sure I got all of the immediate operand conversions correct, since the value seems to have been ignored by the old pattern but I don't think it really matters. llvm-svn: 258787 2016-01-26 12:38:08 +08:00			`%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0`
			`%tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0`
AMDGPU: Remove SITypeRewriter This was an old workaround for using v16i8 in some old intrinsics for resource descriptors. llvm-svn: 306603 2017-06-29 05:38:50 +08:00			`%tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0`
			`%tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0`
AMDGPU: Remove SI_fs_constant and SI_fs_interp intrinsics Update test uses with expansion in terms of new intrinsics. llvm-svn: 295269 2017-02-16 10:01:13 +08:00			`%i.i = extractelement <2 x i32> %arg5, i32 0`
			`%j.i = extractelement <2 x i32> %arg5, i32 1`
			`%i.f.i = bitcast i32 %i.i to float`
			`%j.f.i = bitcast i32 %j.i to float`
			`%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0`
			`%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0`
			`%i.i1 = extractelement <2 x i32> %arg5, i32 0`
			`%j.i2 = extractelement <2 x i32> %arg5, i32 1`
			`%i.f.i3 = bitcast i32 %i.i1 to float`
			`%j.f.i4 = bitcast i32 %j.i2 to float`
			`%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg3) #0`
			`%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg3) #0`
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`%tmp28 = bitcast float %tmp21 to i32`
AMDGPU: Remove SI_fs_constant and SI_fs_interp intrinsics Update test uses with expansion in terms of new intrinsics. llvm-svn: 295269 2017-02-16 10:01:13 +08:00			`%tmp29 = bitcast float %p2.i to i32`
			`%tmp30 = bitcast float %p2.i6 to i32`
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`%tmp31 = insertelement <4 x i32> undef, i32 %tmp28, i32 0`
			`%tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1`
			`%tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2`
			`%tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3`
AMDGPU: Convert image intrinsic uses in tests llvm-svn: 298386 2017-03-22 00:24:12 +08:00			`%tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float>`
AMDGPU: Remove SITypeRewriter This was an old workaround for using v16i8 in some old intrinsics for resource descriptors. llvm-svn: 306603 2017-06-29 05:38:50 +08:00			`%tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)`
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`%tmp36 = extractelement <4 x float> %tmp35, i32 0`
			`%tmp37 = extractelement <4 x float> %tmp35, i32 1`
			`%tmp38 = extractelement <4 x float> %tmp35, i32 2`
			`%tmp39 = extractelement <4 x float> %tmp35, i32 3`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`ret void`
			`}`

AMDGPU: Remove SI_fs_constant and SI_fs_interp intrinsics Update test uses with expansion in terms of new intrinsics. llvm-svn: 295269 2017-02-16 10:01:13 +08:00			`declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1`
			`declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1`
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. llvm-svn: 295792 2017-02-22 08:02:21 +08:00			`declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0`
AMDGPU: Convert image intrinsic uses in tests llvm-svn: 298386 2017-03-22 00:24:12 +08:00			`declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2`
AMDGPU: Remove SITypeRewriter This was an old workaround for using v16i8 in some old intrinsics for resource descriptors. llvm-svn: 306603 2017-06-29 05:38:50 +08:00			`declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1`
AMDGPU: Add a shader calling convention This makes it possible to distinguish between mesa shaders and other kernels even in the presence of compute shaders. Patch By: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Differential Revision: http://reviews.llvm.org/D18559 llvm-svn: 265589 2016-04-07 03:40:20 +08:00
AMDGPU: Remove SI_fs_constant and SI_fs_interp intrinsics Update test uses with expansion in terms of new intrinsics. llvm-svn: 295269 2017-02-16 10:01:13 +08:00			`attributes #0 = { nounwind }`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`attributes #1 = { nounwind readnone }`
AMDGPU: Convert image intrinsic uses in tests llvm-svn: 298386 2017-03-22 00:24:12 +08:00			`attributes #2 = { nounwind readonly }`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`!0 = !{!1, !1, i64 0, i32 1}`
[Verifier] Add verification for TBAA metadata Summary: This change adds some verification in the IR verifier around struct path TBAA metadata. Other than some basic sanity checks (e.g. we get constant integers where we expect constant integers), this checks: - That by the time an struct access tuple `(base-type, offset)` is "reduced" to a scalar base type, the offset is `0`. For instance, in C++ you can't start from, say `("struct-a", 16)`, and end up with `("int", 4)` -- by the time the base type is `"int"`, the offset better be zero. In particular, a variant of this invariant is needed for `llvm::getMostGenericTBAA` to be correct. - That there are no cycles in a struct path. - That struct type nodes have their offsets listed in an ascending order. - That when generating the struct access path, you eventually reach the access type listed in the tbaa tag node. Reviewers: dexonsmith, chandlerc, reames, mehdi_amini, manmanren Subscribers: mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D26438 llvm-svn: 289402 2016-12-12 04:07:15 +08:00			`!1 = !{!"const", !2}`
			`!2 = !{!"tbaa root"}`