diff --git a/llvm/test/CodeGen/ARM/vraddhn.ll b/llvm/test/CodeGen/ARM/vraddhn.ll index b3f2f0d5e2b8..40e5cea99f63 100644 --- a/llvm/test/CodeGen/ARM/vraddhn.ll +++ b/llvm/test/CodeGen/ARM/vraddhn.ll @@ -1,9 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vraddhn\\.i16} %t | count 1 -; RUN: grep {vraddhn\\.i32} %t | count 1 -; RUN: grep {vraddhn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vraddhni16: +;CHECK: vraddhn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -11,6 +10,8 @@ define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vraddhni32: +;CHECK: vraddhn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -18,6 +19,8 @@ define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vraddhni64: +;CHECK: vraddhn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) diff --git a/llvm/test/CodeGen/ARM/vrecpe.ll b/llvm/test/CodeGen/ARM/vrecpe.ll index a97054fa927b..72e904e3d4ee 100644 --- a/llvm/test/CodeGen/ARM/vrecpe.ll +++ b/llvm/test/CodeGen/ARM/vrecpe.ll @@ -1,26 +1,32 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrecpe\\.u32} %t | count 2 -; RUN: grep {vrecpe\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind { +;CHECK: vrecpei32: +;CHECK: vrecpe.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp2 } define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind { +;CHECK: vrecpeQi32: +;CHECK: vrecpe.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp2 } define <2 x float> @vrecpef32(<2 x float>* %A) nounwind { +;CHECK: vrecpef32: +;CHECK: vrecpe.f32 %tmp1 = load <2 x float>* %A %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1) ret <2 x float> %tmp2 } define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind { +;CHECK: vrecpeQf32: +;CHECK: vrecpe.f32 %tmp1 = load <4 x float>* %A %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1) ret <4 x float> %tmp2 diff --git a/llvm/test/CodeGen/ARM/vrecps.ll b/llvm/test/CodeGen/ARM/vrecps.ll index 5ddd60b336c8..71e8f867b414 100644 --- a/llvm/test/CodeGen/ARM/vrecps.ll +++ b/llvm/test/CodeGen/ARM/vrecps.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrecps\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vrecpsf32: +;CHECK: vrecps.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) @@ -9,6 +10,8 @@ define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind { } define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vrecpsQf32: +;CHECK: vrecps.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) diff --git a/llvm/test/CodeGen/ARM/vrhadd.ll b/llvm/test/CodeGen/ARM/vrhadd.ll index 6fa9f5e0e89c..d47dfc4dab1a 100644 --- a/llvm/test/CodeGen/ARM/vrhadd.ll +++ b/llvm/test/CodeGen/ARM/vrhadd.ll @@ -1,12 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrhadd\\.s8} %t | count 2 -; RUN: grep {vrhadd\\.s16} %t | count 2 -; RUN: grep {vrhadd\\.s32} %t | count 2 -; RUN: grep {vrhadd\\.u8} %t | count 2 -; RUN: grep {vrhadd\\.u16} %t | count 2 -; RUN: grep {vrhadd\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrhadds8: +;CHECK: vrhadd.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrhadds16: +;CHECK: vrhadd.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrhadds32: +;CHECK: vrhadd.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrhaddu8: +;CHECK: vrhadd.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrhaddu16: +;CHECK: vrhadd.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrhaddu32: +;CHECK: vrhadd.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -49,6 +55,8 @@ define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrhaddQs8: +;CHECK: vrhadd.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -56,6 +64,8 @@ define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrhaddQs16: +;CHECK: vrhadd.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -63,6 +73,8 @@ define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrhaddQs32: +;CHECK: vrhadd.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -70,6 +82,8 @@ define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrhaddQu8: +;CHECK: vrhadd.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -77,6 +91,8 @@ define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrhaddQu16: +;CHECK: vrhadd.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -84,6 +100,8 @@ define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrhaddQu32: +;CHECK: vrhadd.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) diff --git a/llvm/test/CodeGen/ARM/vrshl.ll b/llvm/test/CodeGen/ARM/vrshl.ll index df051e9b16a8..48fd388e9a4c 100644 --- a/llvm/test/CodeGen/ARM/vrshl.ll +++ b/llvm/test/CodeGen/ARM/vrshl.ll @@ -1,22 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrshl\\.s8} %t | count 2 -; RUN: grep {vrshl\\.s16} %t | count 2 -; RUN: grep {vrshl\\.s32} %t | count 2 -; RUN: grep {vrshl\\.s64} %t | count 2 -; RUN: grep {vrshl\\.u8} %t | count 2 -; RUN: grep {vrshl\\.u16} %t | count 2 -; RUN: grep {vrshl\\.u32} %t | count 2 -; RUN: grep {vrshl\\.u64} %t | count 2 -; RUN: grep {vrshr\\.s8} %t | count 2 -; RUN: grep {vrshr\\.s16} %t | count 2 -; RUN: grep {vrshr\\.s32} %t | count 2 -; RUN: grep {vrshr\\.s64} %t | count 2 -; RUN: grep {vrshr\\.u8} %t | count 2 -; RUN: grep {vrshr\\.u16} %t | count 2 -; RUN: grep {vrshr\\.u32} %t | count 2 -; RUN: grep {vrshr\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrshls8: +;CHECK: vrshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -24,6 +10,8 @@ define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrshls16: +;CHECK: vrshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -31,6 +19,8 @@ define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrshls32: +;CHECK: vrshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -38,6 +28,8 @@ define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrshls64: +;CHECK: vrshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -45,6 +37,8 @@ define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrshlu8: +;CHECK: vrshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -52,6 +46,8 @@ define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrshlu16: +;CHECK: vrshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -59,6 +55,8 @@ define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrshlu32: +;CHECK: vrshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -66,6 +64,8 @@ define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrshlu64: +;CHECK: vrshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -73,6 +73,8 @@ define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrshlQs8: +;CHECK: vrshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -80,6 +82,8 @@ define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrshlQs16: +;CHECK: vrshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -87,6 +91,8 @@ define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrshlQs32: +;CHECK: vrshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -94,6 +100,8 @@ define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrshlQs64: +;CHECK: vrshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -101,6 +109,8 @@ define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrshlQu8: +;CHECK: vrshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -108,6 +118,8 @@ define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrshlQu16: +;CHECK: vrshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -115,6 +127,8 @@ define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrshlQu32: +;CHECK: vrshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -122,6 +136,8 @@ define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrshlQu64: +;CHECK: vrshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -129,96 +145,128 @@ define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind { +;CHECK: vrshrs8: +;CHECK: vrshr.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind { +;CHECK: vrshrs16: +;CHECK: vrshr.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind { +;CHECK: vrshrs32: +;CHECK: vrshr.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) ret <2 x i32> %tmp2 } define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind { +;CHECK: vrshrs64: +;CHECK: vrshr.s64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) ret <1 x i64> %tmp2 } define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind { +;CHECK: vrshru8: +;CHECK: vrshr.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind { +;CHECK: vrshru16: +;CHECK: vrshr.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind { +;CHECK: vrshru32: +;CHECK: vrshr.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) ret <2 x i32> %tmp2 } define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind { +;CHECK: vrshru64: +;CHECK: vrshr.u64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) ret <1 x i64> %tmp2 } define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind { +;CHECK: vrshrQs8: +;CHECK: vrshr.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <16 x i8> %tmp2 } define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind { +;CHECK: vrshrQs16: +;CHECK: vrshr.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) ret <8 x i16> %tmp2 } define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind { +;CHECK: vrshrQs32: +;CHECK: vrshr.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) ret <4 x i32> %tmp2 } define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind { +;CHECK: vrshrQs64: +;CHECK: vrshr.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) ret <2 x i64> %tmp2 } define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind { +;CHECK: vrshrQu8: +;CHECK: vrshr.u8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <16 x i8> %tmp2 } define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind { +;CHECK: vrshrQu16: +;CHECK: vrshr.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) ret <8 x i16> %tmp2 } define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind { +;CHECK: vrshrQu32: +;CHECK: vrshr.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) ret <4 x i32> %tmp2 } define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind { +;CHECK: vrshrQu64: +;CHECK: vrshr.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) ret <2 x i64> %tmp2 diff --git a/llvm/test/CodeGen/ARM/vrshrn.ll b/llvm/test/CodeGen/ARM/vrshrn.ll index 3dd21bc176d2..b0dedef5e4a4 100644 --- a/llvm/test/CodeGen/ARM/vrshrn.ll +++ b/llvm/test/CodeGen/ARM/vrshrn.ll @@ -1,21 +1,24 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrshrn\\.i16} %t | count 1 -; RUN: grep {vrshrn\\.i32} %t | count 1 -; RUN: grep {vrshrn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind { +;CHECK: vrshrns8: +;CHECK: vrshrn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind { +;CHECK: vrshrns16: +;CHECK: vrshrn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind { +;CHECK: vrshrns32: +;CHECK: vrshrn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 diff --git a/llvm/test/CodeGen/ARM/vrsqrte.ll b/llvm/test/CodeGen/ARM/vrsqrte.ll index 5eb9494db590..ef0aa8a0584d 100644 --- a/llvm/test/CodeGen/ARM/vrsqrte.ll +++ b/llvm/test/CodeGen/ARM/vrsqrte.ll @@ -1,26 +1,32 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrsqrte\\.u32} %t | count 2 -; RUN: grep {vrsqrte\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind { +;CHECK: vrsqrtei32: +;CHECK: vrsqrte.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp2 } define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind { +;CHECK: vrsqrteQi32: +;CHECK: vrsqrte.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp2 } define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind { +;CHECK: vrsqrtef32: +;CHECK: vrsqrte.f32 %tmp1 = load <2 x float>* %A %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1) ret <2 x float> %tmp2 } define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind { +;CHECK: vrsqrteQf32: +;CHECK: vrsqrte.f32 %tmp1 = load <4 x float>* %A %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1) ret <4 x float> %tmp2 diff --git a/llvm/test/CodeGen/ARM/vrsqrts.ll b/llvm/test/CodeGen/ARM/vrsqrts.ll index 46a4ce93becf..99f4714f7fa4 100644 --- a/llvm/test/CodeGen/ARM/vrsqrts.ll +++ b/llvm/test/CodeGen/ARM/vrsqrts.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrsqrts\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vrsqrtsf32: +;CHECK: vrsqrts.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) @@ -9,6 +10,8 @@ define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind { } define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vrsqrtsQf32: +;CHECK: vrsqrts.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) diff --git a/llvm/test/CodeGen/ARM/vrsubhn.ll b/llvm/test/CodeGen/ARM/vrsubhn.ll index 4691783dc83d..cd7b99579bd6 100644 --- a/llvm/test/CodeGen/ARM/vrsubhn.ll +++ b/llvm/test/CodeGen/ARM/vrsubhn.ll @@ -1,9 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vrsubhn\\.i16} %t | count 1 -; RUN: grep {vrsubhn\\.i32} %t | count 1 -; RUN: grep {vrsubhn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrsubhni16: +;CHECK: vrsubhn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -11,6 +10,8 @@ define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrsubhni32: +;CHECK: vrsubhn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -18,6 +19,8 @@ define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrsubhni64: +;CHECK: vrsubhn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)