llvm-project/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll

; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s

declare i32 @llvm.SI.tid() nounwind readnone
declare void @llvm.AMDGPU.barrier.local() nounwind convergent

; The required pointer calculations for the alloca'd actually requires
; an add and won't be folded into the addressing, which fails with a
; 64-bit pointer add. This should work since private pointers should
; be 32-bits.

; SI-LABEL: {{^}}test_private_array_ptr_calc:

; FIXME: We end up with zero argument for ADD, because
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset.  We should fold this into the store.
; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector.  It currently fails because it does not know how
; to interpret:
; getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b

; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
  %alloca = alloca [4 x i32], i32 4, align 16
  %tid = call i32 @llvm.SI.tid() readnone
  %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
  %b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid
  %a = load i32, i32 addrspace(1)* %a_ptr
  %b = load i32, i32 addrspace(1)* %b_ptr
  %result = add i32 %a, %b
  %alloca_ptr = getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
  store i32 %result, i32* %alloca_ptr, align 4
  ; Dummy call
  call void @llvm.AMDGPU.barrier.local() nounwind convergent
  %reload = load i32, i32* %alloca_ptr, align 4
  %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
  store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
  ret void
}
R600/SI: Add a stub GCNTargetMachine This is equivalent to the AMDGPUTargetMachine now, but it is the starting point for separating R600 and GCN functionality into separate targets. It is recommened that users start using the gcn triple for GCN-based GPUs, because using the r600 triple for these GPUs will be deprecated in the future. llvm-svn: 225277 2015-01-07 02:00:21 +08:00			`; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s \| FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s`
			`; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s \| FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00
			`declare i32 @llvm.SI.tid() nounwind readnone`
AMDGPU: Switch barrier intrinsics to using convergent noduplicate prevents unrolling of small loops that happen to have barriers in them. If a loop has a barrier in it, it is OK to duplicate it for the unroll. llvm-svn: 256075 2015-12-19 09:46:41 +08:00			`declare void @llvm.AMDGPU.barrier.local() nounwind convergent`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00
			`; The required pointer calculations for the alloca'd actually requires`
			`; an add and won't be folded into the addressing, which fails with a`
			`; 64-bit pointer add. This should work since private pointers should`
			`; be 32-bits.`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; SI-LABEL: {{^}}test_private_array_ptr_calc:`
R600: Run more tests with promote alloca disabled. Re-run tests changed in r211110 to test both paths. Also fix broken check line. llvm-svn: 212895 2014-07-13 10:46:17 +08:00
R600/SI: Use scratch memory for large private arrays llvm-svn: 213551 2014-07-21 23:45:01 +08:00			`; FIXME: We end up with zero argument for ADD, because`
			`; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index`
			`; with the appropriate offset. We should fold this into the store.`
AMDGPU: Add sdst operand to VOP2b instructions The VOP3 encoding of these allows any SGPR pair for the i1 output, but this was forced before to always use vcc. This doesn't yet try to use this, but does add the operand to the definitions so the main change is adding vcc to the output of the VOP2 encoding. llvm-svn: 246358 2015-08-29 15:16:50 +08:00			`; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`;`
			`; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this`
			`; alloca to a vector. It currently fails because it does not know how`
			`; to interpret:`
AMDGPU: Do not promote allocas with non-inbounds GEPs If we can't assume the pointer value isn't within the bounds of the object, it seems risky to try to replace the pointer calculations. llvm-svn: 259573 2016-02-03 05:16:12 +08:00			`; getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b`
R600: Run more tests with promote alloca disabled. Re-run tests changed in r211110 to test both paths. Also fix broken check line. llvm-svn: 212895 2014-07-13 10:46:17 +08:00
AMDGPU: Add sdst operand to VOP2b instructions The VOP3 encoding of these allows any SGPR pair for the i1 output, but this was forced before to always use vcc. This doesn't yet try to use this, but does add the operand to the definitions so the main change is adding vcc to the output of the VOP2 encoding. llvm-svn: 246358 2015-08-29 15:16:50 +08:00			`; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI-PROMOTE: ds_write_b32 [[PTRREG]]`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {`
			`%alloca = alloca [4 x i32], i32 4, align 16`
			`%tid = call i32 @llvm.SI.tid() readnone`
AMDGPU: Do not promote allocas with non-inbounds GEPs If we can't assume the pointer value isn't within the bounds of the object, it seems risky to try to replace the pointer calculations. llvm-svn: 259573 2016-02-03 05:16:12 +08:00			`%a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid`
			`%b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%a = load i32, i32 addrspace(1)* %a_ptr`
			`%b = load i32, i32 addrspace(1)* %b_ptr`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`%result = add i32 %a, %b`
AMDGPU: Do not promote allocas with non-inbounds GEPs If we can't assume the pointer value isn't within the bounds of the object, it seems risky to try to replace the pointer calculations. llvm-svn: 259573 2016-02-03 05:16:12 +08:00			`%alloca_ptr = getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`store i32 %result, i32* %alloca_ptr, align 4`
			`; Dummy call`
AMDGPU: Switch barrier intrinsics to using convergent noduplicate prevents unrolling of small loops that happen to have barriers in them. If a loop has a barrier in it, it is OK to duplicate it for the unroll. llvm-svn: 256075 2015-12-19 09:46:41 +08:00			`call void @llvm.AMDGPU.barrier.local() nounwind convergent`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%reload = load i32, i32* %alloca_ptr, align 4`
AMDGPU: Do not promote allocas with non-inbounds GEPs If we can't assume the pointer value isn't within the bounds of the object, it seems risky to try to replace the pointer calculations. llvm-svn: 259573 2016-02-03 05:16:12 +08:00			`%out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`store i32 %reload, i32 addrspace(1)* %out_ptr, align 4`
			`ret void`
			`}`