llvm-project/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll

; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s

declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
declare void @llvm.amdgcn.s.barrier() #2

; The required pointer calculations for the alloca'd actually requires
; an add and won't be folded into the addressing, which fails with a
; 64-bit pointer add. This should work since private pointers should
; be 32-bits.

; SI-LABEL: {{^}}test_private_array_ptr_calc:

; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16, v{{[0-9]+}}
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:64
; SI-ALLOCA: s_barrier
; SI-ALLOCA: buffer_load_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:64
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector.  It currently fails because it does not know how
; to interpret:
; getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b

; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
  %alloca = alloca [16 x i32], align 16
  %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
  %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
  %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
  %b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid
  %a = load i32, i32 addrspace(1)* %a_ptr, !range !0
  %b = load i32, i32 addrspace(1)* %b_ptr, !range !0
  %result = add i32 %a, %b
  %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b
  store i32 %result, i32* %alloca_ptr, align 4
  ; Dummy call
  call void @llvm.amdgcn.s.barrier()
  %reload = load i32, i32* %alloca_ptr, align 4, !range !0
  %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
  store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
  ret void
}

attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind convergent }

!0 = !{i32 0, i32 65536 }
AMDGPU: Cleanup subtarget features Try to avoid mutually exclusive features. Don't use a real default GPU, and use a fake "generic". The goal is to make it easier to see which set of features are incompatible between feature strings. Most of the test changes are due to random scheduling changes from not having a default fullspeed model. llvm-svn: 310258 2017-08-07 22:58:04 +08:00			`; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s \| FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s`
			`; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s \| FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1`
			`declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1`
			`declare void @llvm.amdgcn.s.barrier() #2`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00
			`; The required pointer calculations for the alloca'd actually requires`
			`; an add and won't be folded into the addressing, which fails with a`
			`; 64-bit pointer add. This should work since private pointers should`
			`; be 32-bits.`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; SI-LABEL: {{^}}test_private_array_ptr_calc:`
R600: Run more tests with promote alloca disabled. Re-run tests changed in r211110 to test both paths. Also fix broken check line. llvm-svn: 212895 2014-07-13 10:46:17 +08:00
AMDGPU: Always allocate emergency stack slot at offset 0 This allows us to ensure that 0 is never a valid pointer to a user object, and ensures that the offset is always legal without needing a register to access it. This comes at the cost of usable offsets and wasted stack space. llvm-svn: 295877 2017-02-23 05:05:25 +08:00			`; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16, v{{[0-9]+}}`
AMDGPU: Fix mishandling array allocations when promoting alloca The canonical form for allocas is a single allocation of the array type. In case we see a non-canonical array alloca, make sure we aren't replacing this with an array N times smaller. llvm-svn: 267916 2016-04-29 02:38:48 +08:00			`; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:64`
AMDGPU: Enable LocalStackSlotAllocation pass This resolves more frame indexes early and folds the immediate offsets into the scratch mubuf instructions. This cleans up a lot of the mess that's currently emitted, such as emitting add 0s and repeatedly initializing the same register to 0 when spilling. llvm-svn: 266508 2016-04-16 10:13:37 +08:00			`; SI-ALLOCA: s_barrier`
AMDGPU: Fix mishandling array allocations when promoting alloca The canonical form for allocas is a single allocation of the array type. In case we see a non-canonical array alloca, make sure we aren't replacing this with an array N times smaller. llvm-svn: 267916 2016-04-29 02:38:48 +08:00			`; SI-ALLOCA: buffer_load_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:64`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`;`
			`; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this`
			`; alloca to a vector. It currently fails because it does not know how`
			`; to interpret:`
AMDGPU: Fix mishandling array allocations when promoting alloca The canonical form for allocas is a single allocation of the array type. In case we see a non-canonical array alloca, make sure we aren't replacing this with an array N times smaller. llvm-svn: 267916 2016-04-29 02:38:48 +08:00			`; getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b`
R600: Run more tests with promote alloca disabled. Re-run tests changed in r211110 to test both paths. Also fix broken check line. llvm-svn: 212895 2014-07-13 10:46:17 +08:00
AMDGPU: Fix mishandling array allocations when promoting alloca The canonical form for allocas is a single allocation of the array type. In case we see a non-canonical array alloca, make sure we aren't replacing this with an array N times smaller. llvm-svn: 267916 2016-04-29 02:38:48 +08:00			`; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI-PROMOTE: ds_write_b32 [[PTRREG]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {`
AMDGPU: Fix mishandling array allocations when promoting alloca The canonical form for allocas is a single allocation of the array type. In case we see a non-canonical array alloca, make sure we aren't replacing this with an array N times smaller. llvm-svn: 267916 2016-04-29 02:38:48 +08:00			`%alloca = alloca [16 x i32], align 16`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);`
			`%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)`
AMDGPU: Do not promote allocas with non-inbounds GEPs If we can't assume the pointer value isn't within the bounds of the object, it seems risky to try to replace the pointer calculations. llvm-svn: 259573 2016-02-03 05:16:12 +08:00			`%a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid`
			`%b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid`
AMDGPU: Don't use MUBUF vaddr if address may overflow Effectively revert r263964. Before we would not allow this if vaddr was not known to be positive. llvm-svn: 318240 2017-11-15 08:45:43 +08:00			`%a = load i32, i32 addrspace(1)* %a_ptr, !range !0`
			`%b = load i32, i32 addrspace(1)* %b_ptr, !range !0`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`%result = add i32 %a, %b`
AMDGPU: Fix mishandling array allocations when promoting alloca The canonical form for allocas is a single allocation of the array type. In case we see a non-canonical array alloca, make sure we aren't replacing this with an array N times smaller. llvm-svn: 267916 2016-04-29 02:38:48 +08:00			`%alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`store i32 %result, i32* %alloca_ptr, align 4`
			`; Dummy call`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`call void @llvm.amdgcn.s.barrier()`
AMDGPU: Don't use MUBUF vaddr if address may overflow Effectively revert r263964. Before we would not allow this if vaddr was not known to be positive. llvm-svn: 318240 2017-11-15 08:45:43 +08:00			`%reload = load i32, i32* %alloca_ptr, align 4, !range !0`
AMDGPU: Do not promote allocas with non-inbounds GEPs If we can't assume the pointer value isn't within the bounds of the object, it seems risky to try to replace the pointer calculations. llvm-svn: 259573 2016-02-03 05:16:12 +08:00			`%out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid`
R600/SI: Make private pointers be 32-bit. Different sized address spaces should theoretically work most of the time now, and since 64-bit add is currently disabled, using more 32-bit pointers fixes some cases. llvm-svn: 197659 2013-12-19 13:32:55 +08:00			`store i32 %reload, i32 addrspace(1)* %out_ptr, align 4`
			`ret void`
			`}`

[AMDGPU] Wave and register controls - Implemented amdgpu-flat-work-group-size attribute - Implemented amdgpu-num-active-waves-per-eu attribute - Implemented amdgpu-num-sgpr attribute - Implemented amdgpu-num-vgpr attribute - Dynamic LDS constraints are in a separate patch Patch by Tom Stellard and Konstantin Zhuravlyov Differential Revision: https://reviews.llvm.org/D21562 llvm-svn: 280747 2016-09-07 04:22:28 +08:00			`attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`attributes #1 = { nounwind readnone }`
			`attributes #2 = { nounwind convergent }`
AMDGPU: Don't use MUBUF vaddr if address may overflow Effectively revert r263964. Before we would not allow this if vaddr was not known to be positive. llvm-svn: 318240 2017-11-15 08:45:43 +08:00
			`!0 = !{i32 0, i32 65536 }`