llvm-project/llvm/test/CodeGen/AArch64/arm64-sminv.ll

; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s

define signext i8 @test_vminv_s8(<8 x i8> %a1) {
; CHECK: test_vminv_s8
; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v0
; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a1)
  %0 = trunc i32 %vminv.i to i8
  ret i8 %0
}

define signext i16 @test_vminv_s16(<4 x i16> %a1) {
; CHECK: test_vminv_s16
; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v0
; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a1)
  %0 = trunc i32 %vminv.i to i16
  ret i16 %0
}

define i32 @test_vminv_s32(<2 x i32> %a1) {
; CHECK: test_vminv_s32
; 2 x i32 is not supported by the ISA, thus, this is a special case
; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v0, v0
; CHECK-NEXT: fmov w0, s[[REGNUM]]
; CHECK-NEXT: ret
entry:
  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a1)
  ret i32 %vminv.i
}

define signext i8 @test_vminvq_s8(<16 x i8> %a1) {
; CHECK: test_vminvq_s8
; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v0
; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a1)
  %0 = trunc i32 %vminv.i to i8
  ret i8 %0
}

define signext i16 @test_vminvq_s16(<8 x i16> %a1) {
; CHECK: test_vminvq_s16
; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v0
; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a1)
  %0 = trunc i32 %vminv.i to i16
  ret i16 %0
}

define i32 @test_vminvq_s32(<4 x i32> %a1) {
; CHECK: test_vminvq_s32
; CHECK: sminv.4s [[REGNUM:s[0-9]+]], v0
; CHECK-NEXT: fmov w0, [[REGNUM]]
; CHECK-NEXT: ret
entry:
  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a1)
  ret i32 %vminv.i
}

define <8 x i8> @test_vminv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
; CHECK-LABEL: test_vminv_s8_used_by_laneop:
; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v1
; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a2)
  %1 = trunc i32 %0 to i8
  %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
  ret <8 x i8> %2
}

define <4 x i16> @test_vminv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
; CHECK-LABEL: test_vminv_s16_used_by_laneop:
; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v1
; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a2)
  %1 = trunc i32 %0 to i16
  %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
  ret <4 x i16> %2
}

define <2 x i32> @test_vminv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
; CHECK-LABEL: test_vminv_s32_used_by_laneop:
; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v1, v1
; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a2)
  %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
  ret <2 x i32> %1
}

define <16 x i8> @test_vminvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: test_vminvq_s8_used_by_laneop:
; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v1
; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a2)
  %1 = trunc i32 %0 to i8
  %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
  ret <16 x i8> %2
}

define <8 x i16> @test_vminvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
; CHECK-LABEL: test_vminvq_s16_used_by_laneop:
; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v1
; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a2)
  %1 = trunc i32 %0 to i16
  %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
  ret <8 x i16> %2
}

define <4 x i32> @test_vminvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
; CHECK-LABEL: test_vminvq_s32_used_by_laneop:
; CHECK: sminv.4s s[[REGNUM:[0-9]+]], v1
; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
; CHECK-NEXT: ret
entry:
  %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a2)
  %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
  ret <4 x i32> %1
}

declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
declare i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32>)
declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
[AARCH64] Enable AARCH64 lit tests on windows dev machines As discussed on PR27654, this patch fixes the triples of a lot of aarch64 tests and enables lit tests on windows This will hopefully help stop cases where windows developers break the aarch64 target Differential Revision: https://reviews.llvm.org/D22191 llvm-svn: 275973 2016-07-19 21:35:11 +08:00			`; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false \| FileCheck %s`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00
			`define signext i8 @test_vminv_s8(<8 x i8> %a1) {`
			`; CHECK: test_vminv_s8`
			`; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v0`
			`; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`%vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a1)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%0 = trunc i32 %vminv.i to i8`
			`ret i8 %0`
			`}`

			`define signext i16 @test_vminv_s16(<4 x i16> %a1) {`
			`; CHECK: test_vminv_s16`
			`; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v0`
			`; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`%vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a1)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%0 = trunc i32 %vminv.i to i16`
			`ret i16 %0`
			`}`

			`define i32 @test_vminv_s32(<2 x i32> %a1) {`
			`; CHECK: test_vminv_s32`
			`; 2 x i32 is not supported by the ISA, thus, this is a special case`
			`; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v0, v0`
			`; CHECK-NEXT: fmov w0, s[[REGNUM]]`
			`; CHECK-NEXT: ret`
			`entry:`
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`%vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a1)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`ret i32 %vminv.i`
			`}`

			`define signext i8 @test_vminvq_s8(<16 x i8> %a1) {`
			`; CHECK: test_vminvq_s8`
			`; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v0`
			`; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`%vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a1)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%0 = trunc i32 %vminv.i to i8`
			`ret i8 %0`
			`}`

			`define signext i16 @test_vminvq_s16(<8 x i16> %a1) {`
			`; CHECK: test_vminvq_s16`
			`; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v0`
			`; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`%vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a1)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%0 = trunc i32 %vminv.i to i16`
			`ret i16 %0`
			`}`

			`define i32 @test_vminvq_s32(<4 x i32> %a1) {`
			`; CHECK: test_vminvq_s32`
			`; CHECK: sminv.4s [[REGNUM:s[0-9]+]], v0`
			`; CHECK-NEXT: fmov w0, [[REGNUM]]`
			`; CHECK-NEXT: ret`
			`entry:`
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`%vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a1)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`ret i32 %vminv.i`
			`}`

[AArch64] Avoid going through GPRs for across-vector instructions. This adds new node types for each intrinsic. For instance, for addv, we have AArch64ISD::UADDV, such that: (v4i32 (uaddv ...)) is the same as (v4i32 (scalar_to_vector (i32 (int_aarch64_neon_uaddv ...)))) that is, (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (i32 (int_aarch64_neon_uaddv ...)), ssub) In a combine, we transform all such across-vector-lanes intrinsics to: (i32 (extract_vector_elt (uaddv ...), 0)) This has one big advantage: by making the extract_element explicit, we enable the existing patterns for lane-aware instructions to fire. This lets us avoid needlessly going through the GPRs. Consider: uint32x4_t test_mul(uint32x4_t a, uint32x4_t b) { return vmulq_n_u32(a, vaddvq_u32(b)); } We now generate: addv.4s s1, v1 mul.4s v0, v0, v1[0] instead of the previous: addv.4s s1, v1 fmov w8, s1 dup.4s v1, w8 mul.4s v0, v1, v0 rdar://20044838 llvm-svn: 231840 2015-03-11 04:45:38 +08:00			`define <8 x i8> @test_vminv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {`
			`; CHECK-LABEL: test_vminv_s8_used_by_laneop:`
			`; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v1`
			`; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
			`%0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a2)`
			`%1 = trunc i32 %0 to i8`
			`%2 = insertelement <8 x i8> %a1, i8 %1, i32 3`
			`ret <8 x i8> %2`
			`}`

			`define <4 x i16> @test_vminv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {`
			`; CHECK-LABEL: test_vminv_s16_used_by_laneop:`
			`; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v1`
			`; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
			`%0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a2)`
			`%1 = trunc i32 %0 to i16`
			`%2 = insertelement <4 x i16> %a1, i16 %1, i32 3`
			`ret <4 x i16> %2`
			`}`

			`define <2 x i32> @test_vminv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {`
			`; CHECK-LABEL: test_vminv_s32_used_by_laneop:`
			`; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v1, v1`
			`; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
			`%0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a2)`
			`%1 = insertelement <2 x i32> %a1, i32 %0, i32 1`
			`ret <2 x i32> %1`
			`}`

			`define <16 x i8> @test_vminvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {`
			`; CHECK-LABEL: test_vminvq_s8_used_by_laneop:`
			`; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v1`
			`; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
			`%0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a2)`
			`%1 = trunc i32 %0 to i8`
			`%2 = insertelement <16 x i8> %a1, i8 %1, i32 3`
			`ret <16 x i8> %2`
			`}`

			`define <8 x i16> @test_vminvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {`
			`; CHECK-LABEL: test_vminvq_s16_used_by_laneop:`
			`; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v1`
			`; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
			`%0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a2)`
			`%1 = trunc i32 %0 to i16`
			`%2 = insertelement <8 x i16> %a1, i16 %1, i32 3`
			`ret <8 x i16> %2`
			`}`

			`define <4 x i32> @test_vminvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {`
			`; CHECK-LABEL: test_vminvq_s32_used_by_laneop:`
			`; CHECK: sminv.4s s[[REGNUM:[0-9]+]], v1`
			`; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]`
			`; CHECK-NEXT: ret`
			`entry:`
			`%0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a2)`
			`%1 = insertelement <4 x i32> %a1, i32 %0, i32 3`
			`ret <4 x i32> %1`
			`}`

AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. llvm-svn: 209577 2014-05-24 20:50:23 +08:00			`declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)`
			`declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)`
			`declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)`
			`declare i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32>)`
			`declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)`
			`declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00