llvm-project/llvm/test/CodeGen/AMDGPU/si-lod-bias.ll

;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s

; This shader has the potential to generated illegal VGPR to SGPR copies if
; the wrong register class is used for the REG_SEQUENCE instructions.

; CHECK: {{^}}main:
; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
  %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
  %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
  %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
  %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
  %tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
  %tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
  %tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0
  %tmp26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
  %tmp27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg3, <2 x i32> %arg5)
  %tmp28 = bitcast float %tmp21 to i32
  %tmp29 = bitcast float %tmp26 to i32
  %tmp30 = bitcast float %tmp27 to i32
  %tmp31 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
  %tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
  %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
  %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
  %tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
  %tmp35 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %tmp34, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  %tmp36 = extractelement <4 x float> %tmp35, i32 0
  %tmp37 = extractelement <4 x float> %tmp35, i32 1
  %tmp38 = extractelement <4 x float> %tmp35, i32 2
  %tmp39 = extractelement <4 x float> %tmp35, i32 3
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp36, float %tmp37, float %tmp38, float %tmp39)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1


declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", null}
R600/SI: Add a stub GCNTargetMachine This is equivalent to the AMDGPUTargetMachine now, but it is the starting point for separating R600 and GCN functionality into separate targets. It is recommened that users start using the gcn triple for GCN-based GPUs, because using the r600 triple for these GPUs will be deprecated in the future. llvm-svn: 225277 2015-01-07 02:00:21 +08:00			`;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck %s`
R600/SI: Enable all tests that pass on VI without changes llvm-svn: 227214 2015-01-28 01:27:15 +08:00			`;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00
R600/SI: Change formatting of printed registers. Print the range of registers used with a single letter prefix. This better matches what the shader compiler produces and is overall less obnoxious than concatenating all of the subregister names together. Instead of SGPR0, it will print s0. Instead of SGPR0_SGPR1, it will print s[0:1] and so on. There doesn't appear to be a straightforward way to get the actual register info in the InstPrinter, so this parses the generated name to print with the new syntax. The required test changes are pretty nasty, and register matching regexes are now worse. Since there isn't a way to add to a variable in FileCheck, some of the tests now don't check the exact number of registers used, but I don't think that will be a real problem. llvm-svn: 194443 2013-11-12 10:35:51 +08:00			`; This shader has the potential to generated illegal VGPR to SGPR copies if`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`; the wrong register class is used for the REG_SEQUENCE instructions.`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK: {{^}}main:`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}`
AMDGPU: Remove old sample intrinsics I did my best to try to update all the uses in tests that just happened to use the old ones to the newer intrinsics. I'm not sure I got all of the immediate operand conversions correct, since the value seems to have been ignored by the old pattern but I don't think it really matters. llvm-svn: 258787 2016-01-26 12:38:08 +08:00			`define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`main_body:`
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0`
			`%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0`
			`%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)`
AMDGPU: Remove old sample intrinsics I did my best to try to update all the uses in tests that just happened to use the old ones to the newer intrinsics. I'm not sure I got all of the immediate operand conversions correct, since the value seems to have been ignored by the old pattern but I don't think it really matters. llvm-svn: 258787 2016-01-26 12:38:08 +08:00			`%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0`
			`%tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0`
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`%tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0`
			`%tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0`
			`%tmp26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)`
			`%tmp27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg3, <2 x i32> %arg5)`
			`%tmp28 = bitcast float %tmp21 to i32`
			`%tmp29 = bitcast float %tmp26 to i32`
			`%tmp30 = bitcast float %tmp27 to i32`
			`%tmp31 = insertelement <4 x i32> undef, i32 %tmp28, i32 0`
			`%tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1`
			`%tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2`
			`%tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3`
AMDGPU: Remove old sample intrinsics I did my best to try to update all the uses in tests that just happened to use the old ones to the newer intrinsics. I'm not sure I got all of the immediate operand conversions correct, since the value seems to have been ignored by the old pattern but I don't think it really matters. llvm-svn: 258787 2016-01-26 12:38:08 +08:00			`%tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>`
			`%tmp35 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %tmp34, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)`
AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`%tmp36 = extractelement <4 x float> %tmp35, i32 0`
			`%tmp37 = extractelement <4 x float> %tmp35, i32 1`
			`%tmp38 = extractelement <4 x float> %tmp35, i32 2`
			`%tmp39 = extractelement <4 x float> %tmp35, i32 3`
			`call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp36, float %tmp37, float %tmp38, float %tmp39)`
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00			`ret void`
			`}`

			`; Function Attrs: nounwind readnone`
			`declare float @llvm.SI.load.const(<16 x i8>, i32) #1`

			`; Function Attrs: nounwind readnone`
			`declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1`

AMDGPU: Remove old sample intrinsics I did my best to try to update all the uses in tests that just happened to use the old ones to the newer intrinsics. I'm not sure I got all of the immediate operand conversions correct, since the value seems to have been ignored by the old pattern but I don't think it really matters. llvm-svn: 258787 2016-01-26 12:38:08 +08:00			`declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1`

R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. llvm-svn: 188427 2013-08-15 07:24:32 +08:00
			`declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)`

			`attributes #0 = { "ShaderType"="0" }`
			`attributes #1 = { nounwind readnone }`

AMDGPU: Run instnamer on a few tests This will make future test updates easier llvm-svn: 258613 2016-01-23 13:42:43 +08:00			`!0 = !{!1, !1, i64 0, i32 1}`
			`!1 = !{!"const", null}`