llvm-project/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll

; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s

; A vector TruncStore can not be selected.
; Test a trunc IR and a vector store IR can be selected correctly.
define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) {
; CHECK-LABEL: truncStore.v2i64:
; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
; CHECK: {{st1 { v[0-9]+.2s }|str d[0-9]+}}, [x{{[0-9]+|sp}}]
  %b = trunc <2 x i64> %a to <2 x i32>
  store <2 x i32> %b, <2 x i32>* %result
  ret void
}

define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) {
; CHECK-LABEL: truncStore.v4i32:
; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
; CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [x{{[0-9]+|sp}}]
  %b = trunc <4 x i32> %a to <4 x i16>
  store <4 x i16> %b, <4 x i16>* %result
  ret void
}

define void @truncStore.v4i8(<4 x i32> %a, <4 x i8>* %result) {
; CHECK-LABEL: truncStore.v4i8:
; CHECK:      xtn [[TMP:(v[0-9]+)]].4h, v{{[0-9]+}}.4s
; CHECK-NEXT: xtn [[TMP2:(v[0-9]+)]].8b, [[TMP]].8h
; CHECK-NEXT: str s{{[0-9]+}}, [x{{[0-9]+}}]
  %b = trunc <4 x i32> %a to <4 x i8>
  store <4 x i8> %b, <4 x i8>* %result
  ret void
}

define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) {
; CHECK-LABEL: truncStore.v8i16:
; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
; CHECK: {{st1 { v[0-9]+.8b }|str d[0-9]+}}, [x{{[0-9]+|sp}}]
  %b = trunc <8 x i16> %a to <8 x i8>
  store <8 x i8> %b, <8 x i8>* %result
  ret void
}

; A vector LoadExt can not be selected.
; Test a vector load IR and a sext/zext IR can be selected correctly.
define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) {
; CHECK-LABEL: loadSExt.v4i8:
; CHECK: ldrsb
  %a = load <4 x i8>, <4 x i8>* %ref
  %conv = sext <4 x i8> %a to <4 x i32>
  ret <4 x i32> %conv
}

define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) {
; CHECK-LABEL: loadZExt.v4i8:
; CHECK: ldrb
  %a = load <4 x i8>, <4 x i8>* %ref
  %conv = zext <4 x i8> %a to <4 x i32>
  ret <4 x i32> %conv
}

define i32 @loadExt.i32(<4 x i8>* %ref) {
; CHECK-LABEL: loadExt.i32:
; CHECK: ldrb
  %a = load <4 x i8>, <4 x i8>* %ref
  %vecext = extractelement <4 x i8> %a, i32 0
  %conv = zext i8 %vecext to i32
  ret i32 %conv
}
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00
			`; A vector TruncStore can not be selected.`
			`; Test a trunc IR and a vector store IR can be selected correctly.`
			`define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) {`
			`; CHECK-LABEL: truncStore.v2i64:`
			`; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d`
AArch64: print NEON lists with a space. This matches ARM64 behaviour, which I think is clearer. It also puts all the churn from that difference into one easily ignored commit. llvm-svn: 207116 2014-04-24 22:06:20 +08:00			`; CHECK: {{st1 { v[0-9]+.2s }\|str d[0-9]+}}, [x{{[0-9]+\|sp}}]`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`%b = trunc <2 x i64> %a to <2 x i32>`
			`store <2 x i32> %b, <2 x i32>* %result`
			`ret void`
			`}`

			`define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) {`
			`; CHECK-LABEL: truncStore.v4i32:`
			`; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s`
AArch64: print NEON lists with a space. This matches ARM64 behaviour, which I think is clearer. It also puts all the churn from that difference into one easily ignored commit. llvm-svn: 207116 2014-04-24 22:06:20 +08:00			`; CHECK: {{st1 { v[0-9]+.4h }\|str d[0-9]+}}, [x{{[0-9]+\|sp}}]`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`%b = trunc <4 x i32> %a to <4 x i16>`
			`store <4 x i16> %b, <4 x i16>* %result`
			`ret void`
[AArch64] Add custom lowering for v4i8 trunc store This patch adds a custom trunc store lowering for v4i8 vector types. Since there is not v.4b register, the v4i8 is promoted to v4i16 (v.4h) and default action for v4i8 is to extract each element and issue 4 byte stores. A better strategy would be to extended the promoted v4i16 to v8i16 (with undef elements) and extract and store the word lane which represents the v4i8 subvectores. The construction: define void @foo(<4 x i16> %x, i8* nocapture %p) { %0 = trunc <4 x i16> %x to <4 x i8> %1 = bitcast i8* %p to <4 x i8>* store <4 x i8> %0, <4 x i8>* %1, align 4, !tbaa !2 ret void } Can be optimized from: umov w8, v0.h[3] umov w9, v0.h[2] umov w10, v0.h[1] umov w11, v0.h[0] strb w8, [x0, #3] strb w9, [x0, #2] strb w10, [x0, #1] strb w11, [x0] ret To: xtn v0.8b, v0.8h str s0, [x0] ret The patch also adjust the memory cost for autovectorization, so the C code: void foo (const int src, int width, unsigned char dst) { for (int i = 0; i < width; i++) dst++ = src++; } can be vectorized to: .LBB0_4: // %vector.body // =>This Inner Loop Header: Depth=1 ldr q0, [x0], #16 subs x12, x12, #4 // =4 xtn v0.4h, v0.4s xtn v0.8b, v0.8h st1 { v0.s }[0], [x2], #4 b.ne .LBB0_4 Instead of byte operations. llvm-svn: 335735 2018-06-27 21:58:46 +08:00			`}`

			`define void @truncStore.v4i8(<4 x i32> %a, <4 x i8>* %result) {`
			`; CHECK-LABEL: truncStore.v4i8:`
			`; CHECK: xtn [[TMP:(v[0-9]+)]].4h, v{{[0-9]+}}.4s`
			`; CHECK-NEXT: xtn [[TMP2:(v[0-9]+)]].8b, [[TMP]].8h`
			`; CHECK-NEXT: str s{{[0-9]+}}, [x{{[0-9]+}}]`
			`%b = trunc <4 x i32> %a to <4 x i8>`
			`store <4 x i8> %b, <4 x i8>* %result`
			`ret void`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`}`

			`define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) {`
			`; CHECK-LABEL: truncStore.v8i16:`
			`; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h`
AArch64: print NEON lists with a space. This matches ARM64 behaviour, which I think is clearer. It also puts all the churn from that difference into one easily ignored commit. llvm-svn: 207116 2014-04-24 22:06:20 +08:00			`; CHECK: {{st1 { v[0-9]+.8b }\|str d[0-9]+}}, [x{{[0-9]+\|sp}}]`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`%b = trunc <8 x i16> %a to <8 x i8>`
			`store <8 x i8> %b, <8 x i8>* %result`
			`ret void`
			`}`

			`; A vector LoadExt can not be selected.`
			`; Test a vector load IR and a sext/zext IR can be selected correctly.`
			`define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) {`
			`; CHECK-LABEL: loadSExt.v4i8:`
			`; CHECK: ldrsb`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%a = load <4 x i8>, <4 x i8>* %ref`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`%conv = sext <4 x i8> %a to <4 x i32>`
			`ret <4 x i32> %conv`
			`}`

			`define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) {`
			`; CHECK-LABEL: loadZExt.v4i8:`
			`; CHECK: ldrb`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%a = load <4 x i8>, <4 x i8>* %ref`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`%conv = zext <4 x i8> %a to <4 x i32>`
			`ret <4 x i32> %conv`
			`}`

			`define i32 @loadExt.i32(<4 x i8>* %ref) {`
			`; CHECK-LABEL: loadExt.i32:`
			`; CHECK: ldrb`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%a = load <4 x i8>, <4 x i8>* %ref`
[AArch64]Pattern match failures for truncate store and extend load llvm-svn: 196748 2013-12-09 11:34:08 +08:00			`%vecext = extractelement <4 x i8> %a, i32 0`
			`%conv = zext i8 %vecext to i32`
			`ret i32 %conv`
AArch64: add newline to end of test files. Should be no other change. llvm-svn: 206174 2014-04-14 21:18:40 +08:00			`}`