llvm-project/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.sub...

; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s

; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:

; GCN: {{flat|global}}_load_dword [[IDX:v[0-9]+]]
; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]]

; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0x3e7

; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]

; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]]
; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]

; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, i32 addrspace(1)* %idx.ptr) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
  %tid.ext = sext i32 %tid to i64
  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
  %idx.gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
  %idx = load i32, i32 addrspace(1)* %idx.gep
  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
  %vecins = insertelement <2 x i16> %vec, i16 999, i32 %idx
  store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out.gep
  ret void
}


declare i32 @llvm.amdgcn.workitem.id.x() #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
[AMDGPU] Disable SReg Global LD/ST, perf regression Differential Revision: https://reviews.llvm.org/D55093 llvm-svn: 348014 2018-12-01 02:29:17 +08:00			`; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals -amdgpu-enable-global-sgpr-addr < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s`
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00
			`; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:`

			`; GCN: {{flat\|global}}_load_dword [[IDX:v[0-9]+]]`
			`; GCN: {{flat\|global}}_load_dword [[VEC:v[0-9]+]]`

			`; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}`
[AMDGPU] Fix for vector element insertion Summary: Incorrect code was generated when lowering insertelement operations for vectors with 8 or 16 bit elements. The value being inserted was not adjusted for the position of the element within the 32 bit word and so only the low element within each 32 bit word could receive the intended value. Fixed by simply replicating the value to each element of a congruent vector before the mask and or operation used to update the intended element. A number of affected LIT tests have been updated appropriately. before the mask & or into the intended Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: llvm-commits, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye Tags: #llvm Differential Revision: https://reviews.llvm.org/D57588 llvm-svn: 352885 2019-02-02 00:51:09 +08:00			`; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0x3e7`
[AMDGPU] Add FixupVectorISel pass, currently Supports SREGs in GLOBAL LD/ST Add a pass to fixup various vector ISel issues. Currently we handle converting GLOBAL_{LOAD\|STORE}_* and GLOBAL_Atomic_* instructions into their _SADDR variants. This involves feeding the sreg into the saddr field of the new instruction. llvm-svn: 347008 2018-11-16 09:13:34 +08:00
			`; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]]`
			`; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]`

			`; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]]`
			`; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]`

			`; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]`
			`; GCN: {{flat\|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]`
			`define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, i32 addrspace(1)* %idx.ptr) #0 {`
			`%tid = call i32 @llvm.amdgcn.workitem.id.x() #1`
			`%tid.ext = sext i32 %tid to i64`
			`%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext`
			`%idx.gep = getelementptr inbounds i32, i32 addrspace(1)* %idx.ptr, i64 %tid.ext`
			`%out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext`
			`%idx = load i32, i32 addrspace(1)* %idx.gep`
			`%vec = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep`
			`%vecins = insertelement <2 x i16> %vec, i16 999, i32 %idx`
			`store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out.gep`
			`ret void`
			`}`


			`declare i32 @llvm.amdgcn.workitem.id.x() #1`

			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readnone }`