Convert more NEON tests to use FileCheck.

llvm-svn: 83587
This commit is contained in:
Bob Wilson 2009-10-08 22:33:53 +00:00
parent e07e33a196
commit f448255063
13 changed files with 295 additions and 100 deletions

View File

@ -1,39 +1,48 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqabs\\.s8} %t | count 2
; RUN: grep {vqabs\\.s16} %t | count 2
; RUN: grep {vqabs\\.s32} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
;CHECK: vqabss8:
;CHECK: vqabs.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
;CHECK: vqabss16:
;CHECK: vqabs.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
;CHECK: vqabss32:
;CHECK: vqabs.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
;CHECK: vqabsQs8:
;CHECK: vqabs.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
;CHECK: vqabsQs16:
;CHECK: vqabs.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
;CHECK: vqabsQs32:
;CHECK: vqabs.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2

View File

@ -1,14 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqadd\\.s8} %t | count 2
; RUN: grep {vqadd\\.s16} %t | count 2
; RUN: grep {vqadd\\.s32} %t | count 2
; RUN: grep {vqadd\\.s64} %t | count 2
; RUN: grep {vqadd\\.u8} %t | count 2
; RUN: grep {vqadd\\.u16} %t | count 2
; RUN: grep {vqadd\\.u32} %t | count 2
; RUN: grep {vqadd\\.u64} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqadds8:
;CHECK: vqadd.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -16,6 +10,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqadds16:
;CHECK: vqadd.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -23,6 +19,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqadds32:
;CHECK: vqadd.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -30,6 +28,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqadds64:
;CHECK: vqadd.s64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -37,6 +37,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqaddu8:
;CHECK: vqadd.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -44,6 +46,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqaddu16:
;CHECK: vqadd.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -51,6 +55,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqaddu32:
;CHECK: vqadd.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -58,6 +64,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqaddu64:
;CHECK: vqadd.u64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -65,6 +73,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqaddQs8:
;CHECK: vqadd.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -72,6 +82,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqaddQs16:
;CHECK: vqadd.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -79,6 +91,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqaddQs32:
;CHECK: vqadd.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -86,6 +100,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqaddQs64:
;CHECK: vqadd.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@ -93,6 +109,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqaddQu8:
;CHECK: vqadd.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -100,6 +118,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqaddQu16:
;CHECK: vqadd.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -107,6 +127,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqaddQu32:
;CHECK: vqadd.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -114,6 +136,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqaddQu64:
;CHECK: vqadd.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqdmlal\\.s16} %t | count 1
; RUN: grep {vqdmlal\\.s32} %t | count 1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK: vqdmlals16:
;CHECK: vqdmlal.s16
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
@ -11,6 +11,8 @@ define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwi
}
define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK: vqdmlals32:
;CHECK: vqdmlal.s32
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqdmlsl\\.s16} %t | count 1
; RUN: grep {vqdmlsl\\.s32} %t | count 1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK: vqdmlsls16:
;CHECK: vqdmlsl.s16
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
@ -11,6 +11,8 @@ define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwi
}
define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK: vqdmlsls32:
;CHECK: vqdmlsl.s32
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C

View File

@ -1,10 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqdmulh\\.s16} %t | count 2
; RUN: grep {vqdmulh\\.s32} %t | count 2
; RUN: grep {vqrdmulh\\.s16} %t | count 2
; RUN: grep {vqrdmulh\\.s32} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqdmulhs16:
;CHECK: vqdmulh.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -12,6 +10,8 @@ define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqdmulhs32:
;CHECK: vqdmulh.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -19,6 +19,8 @@ define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqdmulhQs16:
;CHECK: vqdmulh.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -26,6 +28,8 @@ define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqdmulhQs32:
;CHECK: vqdmulh.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -39,6 +43,8 @@ declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind re
declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqrdmulhs16:
;CHECK: vqrdmulh.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -46,6 +52,8 @@ define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqrdmulhs32:
;CHECK: vqrdmulh.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -53,6 +61,8 @@ define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqrdmulhQs16:
;CHECK: vqrdmulh.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -60,6 +70,8 @@ define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqrdmulhQs32:
;CHECK: vqrdmulh.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqdmull\\.s16} %t | count 1
; RUN: grep {vqdmull\\.s32} %t | count 1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqdmulls16:
;CHECK: vqdmull.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -10,6 +10,8 @@ define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqdmulls32:
;CHECK: vqdmull.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)

View File

@ -1,63 +1,72 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqmovn\\.s16} %t | count 1
; RUN: grep {vqmovn\\.s32} %t | count 1
; RUN: grep {vqmovn\\.s64} %t | count 1
; RUN: grep {vqmovn\\.u16} %t | count 1
; RUN: grep {vqmovn\\.u32} %t | count 1
; RUN: grep {vqmovn\\.u64} %t | count 1
; RUN: grep {vqmovun\\.s16} %t | count 1
; RUN: grep {vqmovun\\.s32} %t | count 1
; RUN: grep {vqmovun\\.s64} %t | count 1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
;CHECK: vqmovns16:
;CHECK: vqmovn.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
;CHECK: vqmovns32:
;CHECK: vqmovn.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
;CHECK: vqmovns64:
;CHECK: vqmovn.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
ret <2 x i32> %tmp2
}
define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
;CHECK: vqmovnu16:
;CHECK: vqmovn.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
;CHECK: vqmovnu32:
;CHECK: vqmovn.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
;CHECK: vqmovnu64:
;CHECK: vqmovn.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
ret <2 x i32> %tmp2
}
define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
;CHECK: vqmovuns16:
;CHECK: vqmovun.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
;CHECK: vqmovuns32:
;CHECK: vqmovun.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
;CHECK: vqmovuns64:
;CHECK: vqmovun.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
ret <2 x i32> %tmp2

View File

@ -1,39 +1,48 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqneg\\.s8} %t | count 2
; RUN: grep {vqneg\\.s16} %t | count 2
; RUN: grep {vqneg\\.s32} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
;CHECK: vqnegs8:
;CHECK: vqneg.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
;CHECK: vqnegs16:
;CHECK: vqneg.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
;CHECK: vqnegs32:
;CHECK: vqneg.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
;CHECK: vqnegQs8:
;CHECK: vqneg.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
;CHECK: vqnegQs16:
;CHECK: vqneg.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
;CHECK: vqnegQs32:
;CHECK: vqneg.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2

View File

@ -1,14 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqrshl\\.s8} %t | count 2
; RUN: grep {vqrshl\\.s16} %t | count 2
; RUN: grep {vqrshl\\.s32} %t | count 2
; RUN: grep {vqrshl\\.s64} %t | count 2
; RUN: grep {vqrshl\\.u8} %t | count 2
; RUN: grep {vqrshl\\.u16} %t | count 2
; RUN: grep {vqrshl\\.u32} %t | count 2
; RUN: grep {vqrshl\\.u64} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqrshls8:
;CHECK: vqrshl.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -16,6 +10,8 @@ define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqrshls16:
;CHECK: vqrshl.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -23,6 +19,8 @@ define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqrshls32:
;CHECK: vqrshl.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -30,6 +28,8 @@ define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqrshls64:
;CHECK: vqrshl.s64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -37,6 +37,8 @@ define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqrshlu8:
;CHECK: vqrshl.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -44,6 +46,8 @@ define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqrshlu16:
;CHECK: vqrshl.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -51,6 +55,8 @@ define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqrshlu32:
;CHECK: vqrshl.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -58,6 +64,8 @@ define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqrshlu64:
;CHECK: vqrshl.u64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -65,6 +73,8 @@ define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqrshlQs8:
;CHECK: vqrshl.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -72,6 +82,8 @@ define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqrshlQs16:
;CHECK: vqrshl.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -79,6 +91,8 @@ define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqrshlQs32:
;CHECK: vqrshl.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -86,6 +100,8 @@ define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqrshlQs64:
;CHECK: vqrshl.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@ -93,6 +109,8 @@ define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqrshlQu8:
;CHECK: vqrshl.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -100,6 +118,8 @@ define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqrshlQu16:
;CHECK: vqrshl.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -107,6 +127,8 @@ define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqrshlQu32:
;CHECK: vqrshl.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -114,6 +136,8 @@ define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqrshlQu64:
;CHECK: vqrshl.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)

View File

@ -1,63 +1,72 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqrshrn\\.s16} %t | count 1
; RUN: grep {vqrshrn\\.s32} %t | count 1
; RUN: grep {vqrshrn\\.s64} %t | count 1
; RUN: grep {vqrshrn\\.u16} %t | count 1
; RUN: grep {vqrshrn\\.u32} %t | count 1
; RUN: grep {vqrshrn\\.u64} %t | count 1
; RUN: grep {vqrshrun\\.s16} %t | count 1
; RUN: grep {vqrshrun\\.s32} %t | count 1
; RUN: grep {vqrshrun\\.s64} %t | count 1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
;CHECK: vqrshrns8:
;CHECK: vqrshrn.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
;CHECK: vqrshrns16:
;CHECK: vqrshrn.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
;CHECK: vqrshrns32:
;CHECK: vqrshrn.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
;CHECK: vqrshrnu8:
;CHECK: vqrshrn.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
;CHECK: vqrshrnu16:
;CHECK: vqrshrn.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
;CHECK: vqrshrnu32:
;CHECK: vqrshrn.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
;CHECK: vqrshruns8:
;CHECK: vqrshrun.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
;CHECK: vqrshruns16:
;CHECK: vqrshrun.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
;CHECK: vqrshruns32:
;CHECK: vqrshrun.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2

View File

@ -1,26 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqshl\\.s8} %t | count 4
; RUN: grep {vqshl\\.s16} %t | count 4
; RUN: grep {vqshl\\.s32} %t | count 4
; RUN: grep {vqshl\\.s64} %t | count 4
; RUN: grep {vqshl\\.u8} %t | count 4
; RUN: grep {vqshl\\.u16} %t | count 4
; RUN: grep {vqshl\\.u32} %t | count 4
; RUN: grep {vqshl\\.u64} %t | count 4
; RUN: grep {vqshl\\.s8.*#7} %t | count 2
; RUN: grep {vqshl\\.s16.*#15} %t | count 2
; RUN: grep {vqshl\\.s32.*#31} %t | count 2
; RUN: grep {vqshl\\.s64.*#63} %t | count 2
; RUN: grep {vqshl\\.u8.*#7} %t | count 2
; RUN: grep {vqshl\\.u16.*#15} %t | count 2
; RUN: grep {vqshl\\.u32.*#31} %t | count 2
; RUN: grep {vqshl\\.u64.*#63} %t | count 2
; RUN: grep {vqshlu\\.s8} %t | count 2
; RUN: grep {vqshlu\\.s16} %t | count 2
; RUN: grep {vqshlu\\.s32} %t | count 2
; RUN: grep {vqshlu\\.s64} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqshls8:
;CHECK: vqshl.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -28,6 +10,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqshls16:
;CHECK: vqshl.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -35,6 +19,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqshls32:
;CHECK: vqshl.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -42,6 +28,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqshls64:
;CHECK: vqshl.s64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -49,6 +37,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqshlu8:
;CHECK: vqshl.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -56,6 +46,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqshlu16:
;CHECK: vqshl.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -63,6 +55,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqshlu32:
;CHECK: vqshl.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -70,6 +64,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqshlu64:
;CHECK: vqshl.u64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -77,6 +73,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqshlQs8:
;CHECK: vqshl.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -84,6 +82,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqshlQs16:
;CHECK: vqshl.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -91,6 +91,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqshlQs32:
;CHECK: vqshl.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -98,6 +100,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqshlQs64:
;CHECK: vqshl.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@ -105,6 +109,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqshlQu8:
;CHECK: vqshl.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -112,6 +118,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqshlQu16:
;CHECK: vqshl.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -119,6 +127,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqshlQu32:
;CHECK: vqshl.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -126,6 +136,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqshlQu64:
;CHECK: vqshl.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@ -133,144 +145,192 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
;CHECK: vqshls_n8:
;CHECK: vqshl.s8{{.*#7}}
%tmp1 = load <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
;CHECK: vqshls_n16:
;CHECK: vqshl.s16{{.*#15}}
%tmp1 = load <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
;CHECK: vqshls_n32:
;CHECK: vqshl.s32{{.*#31}}
%tmp1 = load <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
;CHECK: vqshls_n64:
;CHECK: vqshl.s64{{.*#63}}
%tmp1 = load <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
;CHECK: vqshlu_n8:
;CHECK: vqshl.u8{{.*#7}}
%tmp1 = load <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
;CHECK: vqshlu_n16:
;CHECK: vqshl.u16{{.*#15}}
%tmp1 = load <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
;CHECK: vqshlu_n32:
;CHECK: vqshl.u32{{.*#31}}
%tmp1 = load <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
;CHECK: vqshlu_n64:
;CHECK: vqshl.u64{{.*#63}}
%tmp1 = load <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
;CHECK: vqshlsu_n8:
;CHECK: vqshlu.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
;CHECK: vqshlsu_n16:
;CHECK: vqshlu.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
;CHECK: vqshlsu_n32:
;CHECK: vqshlu.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
;CHECK: vqshlsu_n64:
;CHECK: vqshlu.s64
%tmp1 = load <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
;CHECK: vqshlQs_n8:
;CHECK: vqshl.s8{{.*#7}}
%tmp1 = load <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
;CHECK: vqshlQs_n16:
;CHECK: vqshl.s16{{.*#15}}
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
;CHECK: vqshlQs_n32:
;CHECK: vqshl.s32{{.*#31}}
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
;CHECK: vqshlQs_n64:
;CHECK: vqshl.s64{{.*#63}}
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2
}
define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
;CHECK: vqshlQu_n8:
;CHECK: vqshl.u8{{.*#7}}
%tmp1 = load <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
;CHECK: vqshlQu_n16:
;CHECK: vqshl.u16{{.*#15}}
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
;CHECK: vqshlQu_n32:
;CHECK: vqshl.u32{{.*#31}}
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
;CHECK: vqshlQu_n64:
;CHECK: vqshl.u64{{.*#63}}
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2
}
define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
;CHECK: vqshlQsu_n8:
;CHECK: vqshlu.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
;CHECK: vqshlQsu_n16:
;CHECK: vqshlu.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
;CHECK: vqshlQsu_n32:
;CHECK: vqshlu.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
;CHECK: vqshlQsu_n64:
;CHECK: vqshlu.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2

View File

@ -1,63 +1,72 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqshrn\\.s16} %t | count 1
; RUN: grep {vqshrn\\.s32} %t | count 1
; RUN: grep {vqshrn\\.s64} %t | count 1
; RUN: grep {vqshrn\\.u16} %t | count 1
; RUN: grep {vqshrn\\.u32} %t | count 1
; RUN: grep {vqshrn\\.u64} %t | count 1
; RUN: grep {vqshrun\\.s16} %t | count 1
; RUN: grep {vqshrun\\.s32} %t | count 1
; RUN: grep {vqshrun\\.s64} %t | count 1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
;CHECK: vqshrns8:
;CHECK: vqshrn.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
;CHECK: vqshrns16:
;CHECK: vqshrn.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
;CHECK: vqshrns32:
;CHECK: vqshrn.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
;CHECK: vqshrnu8:
;CHECK: vqshrn.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
;CHECK: vqshrnu16:
;CHECK: vqshrn.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
;CHECK: vqshrnu32:
;CHECK: vqshrn.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
;CHECK: vqshruns8:
;CHECK: vqshrun.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
;CHECK: vqshruns16:
;CHECK: vqshrun.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
;CHECK: vqshruns32:
;CHECK: vqshrun.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2

View File

@ -1,14 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vqsub\\.s8} %t | count 2
; RUN: grep {vqsub\\.s16} %t | count 2
; RUN: grep {vqsub\\.s32} %t | count 2
; RUN: grep {vqsub\\.s64} %t | count 2
; RUN: grep {vqsub\\.u8} %t | count 2
; RUN: grep {vqsub\\.u16} %t | count 2
; RUN: grep {vqsub\\.u32} %t | count 2
; RUN: grep {vqsub\\.u64} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqsubs8:
;CHECK: vqsub.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -16,6 +10,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqsubs16:
;CHECK: vqsub.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -23,6 +19,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqsubs32:
;CHECK: vqsub.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -30,6 +28,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqsubs64:
;CHECK: vqsub.s64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -37,6 +37,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vqsubu8:
;CHECK: vqsub.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -44,6 +46,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vqsubu16:
;CHECK: vqsub.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -51,6 +55,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vqsubu32:
;CHECK: vqsub.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -58,6 +64,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: vqsubu64:
;CHECK: vqsub.u64
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
@ -65,6 +73,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqsubQs8:
;CHECK: vqsub.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -72,6 +82,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqsubQs16:
;CHECK: vqsub.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -79,6 +91,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqsubQs32:
;CHECK: vqsub.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -86,6 +100,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqsubQs64:
;CHECK: vqsub.s64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
@ -93,6 +109,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
}
define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vqsubQu8:
;CHECK: vqsub.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -100,6 +118,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vqsubQu16:
;CHECK: vqsub.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -107,6 +127,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vqsubQu32:
;CHECK: vqsub.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -114,6 +136,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: vqsubQu64:
;CHECK: vqsub.u64
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)