llvm-project/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll

; RUN: llc -march=amdgcn -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

; XXX - Why the packing?
; GCN-LABEL: {{^}}scalar_to_vector_v2i32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 16, [[VAL]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[SHR]]
; GCN: v_or_b32_e32 v[[OR:[0-9]+]], [[SHL]], [[SHR]]
; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[OR]]
; GCN: buffer_store_dwordx2 v{{\[}}[[OR]]:[[COPY]]{{\]}}
define amdgpu_kernel void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
  %tmp1 = load i32, i32 addrspace(1)* %in, align 4
  %bc = bitcast i32 %tmp1 to <2 x i16>
  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
  ret void
}

; GCN-LABEL: {{^}}scalar_to_vector_v2f32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
  %tmp1 = load float, float addrspace(1)* %in, align 4
  %bc = bitcast float %tmp1 to <2 x i16>
  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
  ret void
}

; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
; to produce one, but for some reason never made it to selection.


; define amdgpu_kernel void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
;   %tmp1 = load i32, i32 addrspace(1)* %in, align 4
;   %bc = bitcast i32 %tmp1 to <4 x i8>

;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
;   store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4
;   ret void
; }

; define amdgpu_kernel void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
;   %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
;   %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
;   %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
;   %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>
;   store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16
;   ret void
; }

; define amdgpu_kernel void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
;   %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
;   %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
;   %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
;   store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16
;   ret void
; }

; define amdgpu_kernel void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
;   ret void
; }

define amdgpu_kernel void @scalar_to_vector_test6(<2 x half> addrspace(1)* %out, i8 zeroext %val) nounwind {
  %newvec0 = insertelement <4 x i8> undef, i8 %val, i32 0
  %bc = bitcast <4 x i8> %newvec0 to <2 x half>
  store <2 x half> %bc, <2 x half> addrspace(1)* %out
  ret void
}
AMDGPU: Cleanup scalar_to_vector test llvm-svn: 294038 2017-02-04 04:49:48 +08:00			`; RUN: llc -march=amdgcn -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s`
			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00
AMDGPU: Improve load/store of illegal types. There was a combine before to handle the simple copy case. Split this into handling loads and stores separately. We might want to change how this handles some of the vector extloads, since this can result in large code size increases. llvm-svn: 274394 2016-07-02 06:47:50 +08:00			`; XXX - Why the packing?`
AMDGPU: Cleanup scalar_to_vector test llvm-svn: 294038 2017-02-04 04:49:48 +08:00			`; GCN-LABEL: {{^}}scalar_to_vector_v2i32:`
			`; GCN: buffer_load_dword [[VAL:v[0-9]+]],`
			`; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 16, [[VAL]]`
			`; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[SHR]]`
			`; GCN: v_or_b32_e32 v[[OR:[0-9]+]], [[SHL]], [[SHR]]`
			`; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[OR]]`
			`; GCN: buffer_store_dwordx2 v{{\[}}[[OR]]:[[COPY]]{{\]}}`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%tmp1 = load i32, i32 addrspace(1)* %in, align 4`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00			`%bc = bitcast i32 %tmp1 to <2 x i16>`
			`%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>`
			`store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU: Cleanup scalar_to_vector test llvm-svn: 294038 2017-02-04 04:49:48 +08:00			`; GCN-LABEL: {{^}}scalar_to_vector_v2f32:`
			`; GCN: buffer_load_dword [[VAL:v[0-9]+]],`
			`; GCN: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]`
			`; GCN: buffer_store_dwordx2`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%tmp1 = load float, float addrspace(1)* %in, align 4`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00			`%bc = bitcast float %tmp1 to <2 x i16>`
			`%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>`
			`store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed`
			`; to produce one, but for some reason never made it to selection.`


AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`; define amdgpu_kernel void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`; %tmp1 = load i32, i32 addrspace(1)* %in, align 4`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00			`; %bc = bitcast i32 %tmp1 to <4 x i8>`

			`; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>`
			`; store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4`
			`; ret void`
			`; }`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`; define amdgpu_kernel void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00			`; %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0`
			`; %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1`
			`; %bc = bitcast <2 x i64> %newvec1 to <4 x i32>`
			`; %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>`
			`; store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16`
			`; ret void`
			`; }`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`; define amdgpu_kernel void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00			`; %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0`
			`; %bc = bitcast <4 x i32> %newvec0 to <8 x i16>`
			`; %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>`
			`; store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16`
			`; ret void`
			`; }`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`; define amdgpu_kernel void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {`
R600/SI: Fix selection failure on scalar_to_vector There seem to be only 2 places that produce these, and it's kind of tricky to hit them. Also fixes failure to bitcast between i64 and v2f32, although this for some reason wasn't actually broken in the simple bitcast testcase, but did in the scalar_to_vector one. llvm-svn: 210664 2014-06-12 01:40:32 +08:00			`; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0`
			`; %bc = bitcast <2 x i32> %newvec0 to <4 x i16>`
			`; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>`
			`; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16`
			`; ret void`
			`; }`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @scalar_to_vector_test6(<2 x half> addrspace(1)* %out, i8 zeroext %val) nounwind {`
AMDGPU: Support v2i16/v2f16 packed operations llvm-svn: 296396 2017-02-28 06:15:25 +08:00			`%newvec0 = insertelement <4 x i8> undef, i8 %val, i32 0`
			`%bc = bitcast <4 x i8> %newvec0 to <2 x half>`
			`store <2 x half> %bc, <2 x half> addrspace(1)* %out`
			`ret void`
			`}`