llvm-project/llvm/test/CodeGen/AMDGPU/vector-alloca.ll

; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
target datalayout = "A5"

; OPT-LABEL: @vector_read(
; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4

; FUNC-LABEL: {{^}}vector_read:
; EG: MOV
; EG: MOV
; EG: MOV
; EG: MOV
; EG: MOVA_INT
define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) {
entry:
  %tmp = alloca [4 x i32], addrspace(5)
  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
  store i32 0, i32 addrspace(5)* %x
  store i32 1, i32 addrspace(5)* %y
  store i32 2, i32 addrspace(5)* %z
  store i32 3, i32 addrspace(5)* %w
  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
  %tmp2 = load i32, i32 addrspace(5)* %tmp1
  store i32 %tmp2, i32 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @vector_write(
; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index
; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index
; OPT: store i32 %1, i32 addrspace(1)* %out, align 4

; FUNC-LABEL: {{^}}vector_write:
; EG: MOV
; EG: MOV
; EG: MOV
; EG: MOV
; EG: MOVA_INT
; EG: MOVA_INT
define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
  %tmp = alloca [4 x i32], addrspace(5)
  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
  store i32 0, i32 addrspace(5)* %x
  store i32 0, i32 addrspace(5)* %y
  store i32 0, i32 addrspace(5)* %z
  store i32 0, i32 addrspace(5)* %w
  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index
  store i32 1, i32 addrspace(5)* %tmp1
  %tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index
  %tmp3 = load i32, i32 addrspace(5)* %tmp2
  store i32 %tmp3, i32 addrspace(1)* %out
  ret void
}

; This test should be optimize to:
; store i32 0, i32 addrspace(1)* %out

; OPT-LABEL: @bitcast_gep(
; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4

; FUNC-LABEL: {{^}}bitcast_gep:
; EG: STORE_RAW
define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
  %tmp = alloca [4 x i32], addrspace(5)
  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
  store i32 0, i32 addrspace(5)* %x
  store i32 0, i32 addrspace(5)* %y
  store i32 0, i32 addrspace(5)* %z
  store i32 0, i32 addrspace(5)* %w
  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
  %tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)*
  %tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0
  %tmp4 = load i32, i32 addrspace(5)* %tmp3
  store i32 %tmp4, i32 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @vector_read_bitcast_gep(
; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {
entry:
  %tmp = alloca [4 x i32], addrspace(5)
  %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
  %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
  %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
  %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
  %bc = bitcast i32 addrspace(5)* %x to float addrspace(5)*
  store float 1.0, float addrspace(5)* %bc
  store i32 1, i32 addrspace(5)* %y
  store i32 2, i32 addrspace(5)* %z
  store i32 3, i32 addrspace(5)* %w
  %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
  %tmp2 = load i32, i32 addrspace(5)* %tmp1
  store i32 %tmp2, i32 addrspace(1)* %out
  ret void
}

; FIXME: Should be able to promote this. Instcombine should fold the
; cast in the hasOneUse case so it might not matter in practice

; OPT-LABEL: @vector_read_bitcast_alloca(
; OPT: alloca [4 x float]
; OPT: store float
; OPT: store float
; OPT: store float
; OPT: store float
; OPT: load float
define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {
entry:
  %tmp = alloca [4 x i32], addrspace(5)
  %tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)*
  %x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0
  %y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1
  %z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2
  %w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3
  store float 0.0, float addrspace(5)* %x
  store float 1.0, float addrspace(5)* %y
  store float 2.0, float addrspace(5)* %z
  store float 4.0, float addrspace(5)* %w
  %tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index
  %tmp2 = load float, float addrspace(5)* %tmp1
  store float %tmp2, float addrspace(1)* %out
  ret void
}

; The pointer arguments in local address space should not affect promotion to vector.

; OPT-LABEL: @vector_read_with_local_arg(
; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) {
entry:
  %tmp = alloca [4 x i32], addrspace(5)
  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
  store i32 0, i32 addrspace(5)* %x
  store i32 1, i32 addrspace(5)* %y
  store i32 2, i32 addrspace(5)* %z
  store i32 3, i32 addrspace(5)* %w
  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
  %tmp2 = load i32, i32 addrspace(5)* %tmp1
  store i32 %tmp2, i32 addrspace(1)* %out
  ret void
}
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s \| FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s`
			`; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s \| FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s`
			`; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s \| FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s`
			`; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s \| FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s`
			`; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s \| FileCheck --check-prefix=EG -check-prefix=FUNC %s`
			`; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s \| FileCheck -check-prefix=OPT %s`
			`target datalayout = "A5"`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00
			`; OPT-LABEL: @vector_read(`
			`; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index`
			`; OPT: store i32 %0, i32 addrspace(1)* %out, align 4`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00
R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; FUNC-LABEL: {{^}}vector_read:`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`; EG: MOV`
			`; EG: MOV`
			`; EG: MOV`
			`; EG: MOV`
			`; EG: MOVA_INT`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) {`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`entry:`
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`%tmp = alloca [4 x i32], addrspace(5)`
			`%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0`
			`%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1`
			`%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2`
			`%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3`
			`store i32 0, i32 addrspace(5)* %x`
			`store i32 1, i32 addrspace(5)* %y`
			`store i32 2, i32 addrspace(5)* %z`
			`store i32 3, i32 addrspace(5)* %w`
			`%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index`
			`%tmp2 = load i32, i32 addrspace(5)* %tmp1`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`store i32 %tmp2, i32 addrspace(1)* %out`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`ret void`
			`}`

AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`; OPT-LABEL: @vector_write(`
			`; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index`
			`; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index`
			`; OPT: store i32 %1, i32 addrspace(1)* %out, align 4`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; FUNC-LABEL: {{^}}vector_write:`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`; EG: MOV`
			`; EG: MOV`
			`; EG: MOV`
			`; EG: MOV`
			`; EG: MOVA_INT`
			`; EG: MOVA_INT`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`entry:`
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`%tmp = alloca [4 x i32], addrspace(5)`
			`%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0`
			`%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1`
			`%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2`
			`%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3`
			`store i32 0, i32 addrspace(5)* %x`
			`store i32 0, i32 addrspace(5)* %y`
			`store i32 0, i32 addrspace(5)* %z`
			`store i32 0, i32 addrspace(5)* %w`
			`%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index`
			`store i32 1, i32 addrspace(5)* %tmp1`
			`%tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index`
			`%tmp3 = load i32, i32 addrspace(5)* %tmp2`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`store i32 %tmp3, i32 addrspace(1)* %out`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`ret void`
			`}`

			`; This test should be optimize to:`
			`; store i32 0, i32 addrspace(1)* %out`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00
			`; OPT-LABEL: @bitcast_gep(`
			`; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; FUNC-LABEL: {{^}}bitcast_gep:`
R600: Run more tests with promote alloca disabled. Re-run tests changed in r211110 to test both paths. Also fix broken check line. llvm-svn: 212895 2014-07-13 10:46:17 +08:00			`; EG: STORE_RAW`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`entry:`
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`%tmp = alloca [4 x i32], addrspace(5)`
			`%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0`
			`%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1`
			`%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2`
			`%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3`
			`store i32 0, i32 addrspace(5)* %x`
			`store i32 0, i32 addrspace(5)* %y`
			`store i32 0, i32 addrspace(5)* %z`
			`store i32 0, i32 addrspace(5)* %w`
			`%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1`
			`%tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)*`
			`%tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0`
			`%tmp4 = load i32, i32 addrspace(5)* %tmp3`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`store i32 %tmp4, i32 addrspace(1)* %out`
			`ret void`
			`}`

			`; OPT-LABEL: @vector_read_bitcast_gep(`
			`; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index`
			`; OPT: store i32 %0, i32 addrspace(1)* %out, align 4`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`entry:`
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`%tmp = alloca [4 x i32], addrspace(5)`
			`%x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0`
			`%y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1`
			`%z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2`
			`%w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3`
			`%bc = bitcast i32 addrspace(5)* %x to float addrspace(5)*`
			`store float 1.0, float addrspace(5)* %bc`
			`store i32 1, i32 addrspace(5)* %y`
			`store i32 2, i32 addrspace(5)* %z`
			`store i32 3, i32 addrspace(5)* %w`
			`%tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index`
			`%tmp2 = load i32, i32 addrspace(5)* %tmp1`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`store i32 %tmp2, i32 addrspace(1)* %out`
			`ret void`
			`}`

			`; FIXME: Should be able to promote this. Instcombine should fold the`
			`; cast in the hasOneUse case so it might not matter in practice`

			`; OPT-LABEL: @vector_read_bitcast_alloca(`
			`; OPT: alloca [4 x float]`
			`; OPT: store float`
			`; OPT: store float`
			`; OPT: store float`
			`; OPT: store float`
			`; OPT: load float`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`entry:`
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`%tmp = alloca [4 x i32], addrspace(5)`
			`%tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)*`
			`%x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0`
			`%y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1`
			`%z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2`
			`%w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3`
			`store float 0.0, float addrspace(5)* %x`
			`store float 1.0, float addrspace(5)* %y`
			`store float 2.0, float addrspace(5)* %z`
			`store float 4.0, float addrspace(5)* %w`
			`%tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index`
			`%tmp2 = load float, float addrspace(5)* %tmp1`
AMDGPU: Remove dead check in AMDGPUPromoteAlloca This is currently only called with GEP users. A direct alloca would only happen with current typed pointers for arrays which are a perverse case. Also fix crashes on 0 x and 1 x arrays. llvm-svn: 275869 2016-07-19 02:34:53 +08:00			`store float %tmp2, float addrspace(1)* %out`
R600: Use LDS and vectors for private memory llvm-svn: 211110 2014-06-18 00:53:14 +08:00			`ret void`
			`}`
AMDGPU/SI: Move the local memory usage related checking after calling convention checking in PromoteAlloca Summary: Promoting Alloca to Vector and Promoting Alloca to LDS are two independent handling of Alloca and should not affect each other. As a result, we should not give up promoting to vector if there is not enough LDS. This patch factors out the local memory usage related checking out and replace it after the calling convention checking. Reviewer: arsenm Differential Revision: http://reviews.llvm.org/D33139 llvm-svn: 303684 2017-05-24 04:25:41 +08:00
			`; The pointer arguments in local address space should not affect promotion to vector.`

			`; OPT-LABEL: @vector_read_with_local_arg(`
			`; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index`
			`; OPT: store i32 %0, i32 addrspace(1)* %out, align 4`
			`define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) {`
			`entry:`
[AMDGPU] Fix pointer info for pseudo source for r600 The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861 2017-11-10 09:53:24 +08:00			`%tmp = alloca [4 x i32], addrspace(5)`
			`%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0`
			`%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1`
			`%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2`
			`%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3`
			`store i32 0, i32 addrspace(5)* %x`
			`store i32 1, i32 addrspace(5)* %y`
			`store i32 2, i32 addrspace(5)* %z`
			`store i32 3, i32 addrspace(5)* %w`
			`%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index`
			`%tmp2 = load i32, i32 addrspace(5)* %tmp1`
AMDGPU/SI: Move the local memory usage related checking after calling convention checking in PromoteAlloca Summary: Promoting Alloca to Vector and Promoting Alloca to LDS are two independent handling of Alloca and should not affect each other. As a result, we should not give up promoting to vector if there is not enough LDS. This patch factors out the local memory usage related checking out and replace it after the calling convention checking. Reviewer: arsenm Differential Revision: http://reviews.llvm.org/D33139 llvm-svn: 303684 2017-05-24 04:25:41 +08:00			`store i32 %tmp2, i32 addrspace(1)* %out`
			`ret void`
			`}`