llvm-project/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll

; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,IDXMODE,GFX9 %s

; indexing of vectors.

; Subtest below moved from file test/CodeGen/AMDGPU/indirect-addressing-si.ll
; to avoid gfx9 scheduling induced issues.


; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
; GCN-DAG: s_load_dwordx16 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]{{\]}}
; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62

; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT15:[0-9]+]], s[[S_ELT15]]
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]

; GCN: v_cmp_eq_u32_e32
; GCN-COUNT-32: v_cndmask_b32

; GCN-COUNT-4: buffer_store_dwordx4
define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(<16 x i32> addrspace(1)* %out0, <16 x i32> addrspace(1)* %out1, i32 addrspace(1)* %in, <16 x i32> %vec0) #0 {
entry:
  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
  %id.ext = zext i32 %id to i64
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext
  %idx0 = load volatile i32, i32 addrspace(1)* %gep
  %idx1 = add i32 %idx0, 1
  %live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"()
  %vec1 = insertelement <16 x i32> %vec0, i32 %live.out.val, i32 %idx0
  %vec2 = insertelement <16 x i32> %vec1, i32 63, i32 %idx1
  store volatile <16 x i32> %vec2, <16 x i32> addrspace(1)* %out0
  %cmp = icmp eq i32 %id, 0
  br i1 %cmp, label %bb1, label %bb2

bb1:
  store volatile i32 %live.out.val, i32 addrspace(1)* undef
  br label %bb2

bb2:
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.amdgcn.s.barrier() #2

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind convergent }
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00			`; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,IDXMODE,GFX9 %s`

			`; indexing of vectors.`

			`; Subtest below moved from file test/CodeGen/AMDGPU/indirect-addressing-si.ll`
			`; to avoid gfx9 scheduling induced issues.`


			`; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:`
[AMDGPU] Convert insert_vector_elt into set of selects This allows to avoid scratch use or indirect VGPR addressing for small vectors. Differential Revision: https://reviews.llvm.org/D54606 llvm-svn: 347231 2018-11-20 01:39:20 +08:00			`; GCN-DAG: s_load_dwordx16 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]{{\]}}`
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00			`; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[IDX0:v[0-9]+]]`
			`; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62`

[AMDGPU] Convert insert_vector_elt into set of selects This allows to avoid scratch use or indirect VGPR addressing for small vectors. Differential Revision: https://reviews.llvm.org/D54606 llvm-svn: 347231 2018-11-20 01:39:20 +08:00			`; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT15:[0-9]+]], s[[S_ELT15]]`
			`; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]`
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00
[AMDGPU] Always expand ext/insertelement with divergent idx Even though series of cmd/cndmask can produce quite a lot of code that is still better than a loop. In case of doubles we would even produce two loops. Differential Revision: https://reviews.llvm.org/D80032 2020-05-16 01:43:18 +08:00			`; GCN: v_cmp_eq_u32_e32`
			`; GCN-COUNT-32: v_cndmask_b32`
AMDGPU: Don't peel of the offset if the resulting base could possibly be negative in Indirect addressing. Summary: Don't peel of the offset if the resulting base could possibly be negative in Indirect addressing. This is because the M0 field is of unsigned. This patch achieves the similar goal as https://reviews.llvm.org/D55241, but keeps the optimization if the base is known unsigned. Reviewers: arsemn Differential Revision: https://reviews.llvm.org/D55568 llvm-svn: 349951 2018-12-22 04:57:34 +08:00
[AMDGPU] Always expand ext/insertelement with divergent idx Even though series of cmd/cndmask can produce quite a lot of code that is still better than a loop. In case of doubles we would even produce two loops. Differential Revision: https://reviews.llvm.org/D80032 2020-05-16 01:43:18 +08:00			`; GCN-COUNT-4: buffer_store_dwordx4`
[AMDGPU] Convert insert_vector_elt into set of selects This allows to avoid scratch use or indirect VGPR addressing for small vectors. Differential Revision: https://reviews.llvm.org/D54606 llvm-svn: 347231 2018-11-20 01:39:20 +08:00			`define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(<16 x i32> addrspace(1)* %out0, <16 x i32> addrspace(1)* %out1, i32 addrspace(1)* %in, <16 x i32> %vec0) #0 {`
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00			`entry:`
			`%id = call i32 @llvm.amdgcn.workitem.id.x() #1`
			`%id.ext = zext i32 %id to i64`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext`
			`%idx0 = load volatile i32, i32 addrspace(1)* %gep`
			`%idx1 = add i32 %idx0, 1`
			`%live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"()`
[AMDGPU] Convert insert_vector_elt into set of selects This allows to avoid scratch use or indirect VGPR addressing for small vectors. Differential Revision: https://reviews.llvm.org/D54606 llvm-svn: 347231 2018-11-20 01:39:20 +08:00			`%vec1 = insertelement <16 x i32> %vec0, i32 %live.out.val, i32 %idx0`
			`%vec2 = insertelement <16 x i32> %vec1, i32 63, i32 %idx1`
			`store volatile <16 x i32> %vec2, <16 x i32> addrspace(1)* %out0`
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00			`%cmp = icmp eq i32 %id, 0`
			`br i1 %cmp, label %bb1, label %bb2`

			`bb1:`
			`store volatile i32 %live.out.val, i32 addrspace(1)* undef`
			`br label %bb2`

			`bb2:`
			`ret void`
			`}`

			`declare i32 @llvm.amdgcn.workitem.id.x() #1`
			`declare void @llvm.amdgcn.s.barrier() #2`

			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readnone }`
			`attributes #2 = { nounwind convergent }`