From 12b9e76b62e6ef9aba40bc26cc3f94d325342e10 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 26 Jul 2016 05:52:37 +0000 Subject: [PATCH] Update for LLVM changes InstSimplify has gained the ability to remove needless bitcasts which perturbed some clang codegen tests. llvm-svn: 276728 --- clang/test/CodeGen/aarch64-neon-2velem.c | 2063 ++--- clang/test/CodeGen/aarch64-neon-across.c | 112 +- clang/test/CodeGen/aarch64-neon-fma.c | 16 +- clang/test/CodeGen/aarch64-neon-intrinsics.c | 6632 +++++++---------- clang/test/CodeGen/aarch64-neon-misc.c | 1636 ++-- clang/test/CodeGen/aarch64-neon-perm.c | 1416 ++-- .../aarch64-neon-scalar-x-indexed-elem.c | 4 +- clang/test/CodeGen/aarch64-poly64.c | 36 +- .../test/CodeGen/arm-neon-directed-rounding.c | 96 +- clang/test/CodeGen/arm-neon-fma.c | 16 +- clang/test/CodeGen/arm-neon-numeric-maxmin.c | 40 +- clang/test/CodeGen/arm-neon-vcvtX.c | 64 +- 12 files changed, 5018 insertions(+), 7113 deletions(-) diff --git a/clang/test/CodeGen/aarch64-neon-2velem.c b/clang/test/CodeGen/aarch64-neon-2velem.c index 36500f62a5d9..2866990433df 100644 --- a/clang/test/CodeGen/aarch64-neon-2velem.c +++ b/clang/test/CodeGen/aarch64-neon-2velem.c @@ -4,7 +4,7 @@ #include -// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -13,7 +13,7 @@ int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { return vmla_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -22,7 +22,7 @@ int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { return vmlaq_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -31,7 +31,7 @@ int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { return vmla_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -40,7 +40,7 @@ int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { return vmlaq_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -49,7 +49,7 @@ int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { return vmla_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -58,7 +58,7 @@ int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { return vmlaq_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -67,7 +67,7 @@ int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { return vmla_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -76,7 +76,7 @@ int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { return vmlaq_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -85,7 +85,7 @@ int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { return vmls_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -94,7 +94,7 @@ int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { return vmlsq_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -103,7 +103,7 @@ int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { return vmls_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -112,7 +112,7 @@ int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { return vmlsq_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -121,7 +121,7 @@ int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { return vmls_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -130,7 +130,7 @@ int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { return vmlsq_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -139,7 +139,7 @@ int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { return vmls_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -148,7 +148,7 @@ int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { return vmlsq_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -156,7 +156,7 @@ int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) { return vmul_lane_s16(a, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -164,7 +164,7 @@ int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) { return vmulq_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -172,7 +172,7 @@ int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { return vmul_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -180,7 +180,7 @@ int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { return vmulq_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -188,7 +188,7 @@ uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) { return vmul_lane_u16(a, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -196,7 +196,7 @@ uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) { return vmulq_lane_u16(a, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -204,7 +204,7 @@ uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) { return vmul_lane_u32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -212,7 +212,7 @@ uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) { return vmulq_lane_u32(a, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -220,7 +220,7 @@ int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) { return vmul_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -228,7 +228,7 @@ int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) { return vmulq_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -236,7 +236,7 @@ int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { return vmul_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -244,7 +244,7 @@ int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { return vmulq_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -252,7 +252,7 @@ uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) { return vmul_laneq_u16(a, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -260,7 +260,7 @@ uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) { return vmulq_laneq_u16(a, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -268,7 +268,7 @@ uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) { return vmul_laneq_u32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -276,7 +276,7 @@ uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) { return vmulq_laneq_u32(a, v, 3); } -// CHECK-LABEL: define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfma_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> @@ -290,7 +290,7 @@ float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { return vfma_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfmaq_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> @@ -304,7 +304,7 @@ float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmaq_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfma_laneq_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> @@ -318,7 +318,7 @@ float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { return vfma_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfmaq_laneq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> @@ -332,7 +332,7 @@ float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { return vfmaq_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfms_lane_f32( // CHECK: [[SUB:%.*]] = fsub <2 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> @@ -347,7 +347,7 @@ float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { return vfms_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfmsq_lane_f32( // CHECK: [[SUB:%.*]] = fsub <4 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> @@ -362,7 +362,7 @@ float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmsq_lane_f32(a, b, v, 1); } -// CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfms_laneq_f32( // CHECK: [[SUB:%.*]] = fsub <2 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> @@ -377,7 +377,7 @@ float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { return vfms_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfmsq_laneq_f32( // CHECK: [[SUB:%.*]] = fsub <4 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> @@ -392,7 +392,7 @@ float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { return vfmsq_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vfmaq_lane_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> @@ -406,7 +406,7 @@ float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { return vfmaq_lane_f64(a, b, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vfmaq_laneq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> @@ -420,7 +420,7 @@ float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { return vfmaq_laneq_f64(a, b, v, 1); } -// CHECK-LABEL: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vfmsq_lane_f64( // CHECK: [[SUB:%.*]] = fsub <2 x double> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> @@ -435,7 +435,7 @@ float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { return vfmsq_lane_f64(a, b, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vfmsq_laneq_f64( // CHECK: [[SUB:%.*]] = fsub <2 x double> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> @@ -450,7 +450,7 @@ float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { return vfmsq_laneq_f64(a, b, v, 1); } -// CHECK-LABEL: define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfmas_laneq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 @@ -460,7 +460,7 @@ float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { return vfmas_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vfmsd_lane_f64( // CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> @@ -471,7 +471,7 @@ float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { return vfmsd_lane_f64(a, b, v, 0); } -// CHECK-LABEL: define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfmss_laneq_f32( // CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> @@ -482,7 +482,7 @@ float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { return vfmss_laneq_f32(a, b, v, 3); } -// CHECK-LABEL: define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vfmsd_laneq_f64( // CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> @@ -493,991 +493,823 @@ float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { return vfmsd_laneq_f64(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlal_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlal_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlal_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlal_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlal_high_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlal_high_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlal_high_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlal_high_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlsl_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlsl_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlsl_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlsl_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlsl_high_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlsl_high_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlsl_high_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlsl_high_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlal_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlal_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlal_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlal_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlal_high_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlal_high_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlal_high_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlal_high_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlsl_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlsl_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlsl_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlsl_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlsl_high_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlsl_high_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlsl_high_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlsl_high_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { return vmull_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { return vmull_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { return vmull_lane_u16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { return vmull_lane_u32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { return vmull_high_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { return vmull_high_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { return vmull_high_lane_u16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { return vmull_high_lane_u32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { return vmull_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { return vmull_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { return vmull_laneq_u16(a, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { return vmull_laneq_u32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { return vmull_high_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { return vmull_high_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { return vmull_high_laneq_u16(a, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { return vmull_high_laneq_u32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlal_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlal_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_lane_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlal_high_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_lane_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlal_high_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlsl_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlsl_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_lane_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlsl_high_lane_s16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_lane_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlsl_high_lane_s32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { return vqdmull_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { return vqdmull_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { return vqdmull_laneq_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { return vqdmull_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_lane_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { return vqdmull_high_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_lane_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { return vqdmull_high_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_laneq_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { return vqdmull_high_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_laneq_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { return vqdmull_high_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { return vqdmulh_lane_s16(a, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { return vqdmulhq_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { return vqdmulh_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { return vqdmulhq_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { return vqrdmulh_lane_s16(a, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { return vqrdmulhq_lane_s16(a, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { return vqrdmulh_lane_s32(a, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) { return vqrdmulhq_lane_s32(a, v, 1); } -// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] // CHECK: ret <2 x float> [[MUL]] @@ -1485,8 +1317,7 @@ float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) { return vmul_lane_f32(a, v, 1); } - -// CHECK-LABEL: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double @@ -1495,20 +1326,21 @@ float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) { // CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] // CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> // CHECK: ret <1 x double> [[TMP5]] + float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) { return vmul_lane_f64(a, v, 0); } - -// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] // CHECK: ret <4 x float> [[MUL]] + float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) { return vmulq_lane_f32(a, v, 1); } -// CHECK-LABEL: define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_f64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] // CHECK: ret <2 x double> [[MUL]] @@ -1516,7 +1348,7 @@ float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) { return vmulq_lane_f64(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] // CHECK: ret <2 x float> [[MUL]] @@ -1524,7 +1356,7 @@ float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) { return vmul_laneq_f32(a, v, 3); } -// CHECK-LABEL: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double @@ -1537,16 +1369,16 @@ float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) { return vmul_laneq_f64(a, v, 1); } - -// CHECK-LABEL: define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] // CHECK: ret <4 x float> [[MUL]] + float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) { return vmulq_laneq_f32(a, v, 3); } -// CHECK-LABEL: define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_f64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] // CHECK: ret <2 x double> [[MUL]] @@ -1554,79 +1386,67 @@ float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) { return vmulq_laneq_f64(a, v, 1); } -// CHECK-LABEL: define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmulx_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) { return vmulx_lane_f32(a, v, 1); } -// CHECK-LABEL: define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmulxq_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) { return vmulxq_lane_f32(a, v, 1); } -// CHECK-LABEL: define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vmulxq_lane_f64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { return vmulxq_lane_f64(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmulx_laneq_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { return vmulx_laneq_f32(a, v, 3); } -// CHECK-LABEL: define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmulxq_laneq_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { return vmulxq_laneq_f32(a, v, 3); } -// CHECK-LABEL: define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vmulxq_laneq_f64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { return vmulxq_laneq_f64(a, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -1635,7 +1455,7 @@ int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { return vmla_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -1644,7 +1464,7 @@ int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { return vmlaq_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -1653,7 +1473,7 @@ int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { return vmla_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -1662,7 +1482,7 @@ int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { return vmlaq_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -1671,7 +1491,7 @@ int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { return vmla_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -1680,7 +1500,7 @@ int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { return vmlaq_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -1689,7 +1509,7 @@ int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { return vmla_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -1698,7 +1518,7 @@ int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { return vmlaq_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -1707,7 +1527,7 @@ int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { return vmls_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -1716,7 +1536,7 @@ int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { return vmlsq_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -1725,7 +1545,7 @@ int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { return vmls_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -1734,7 +1554,7 @@ int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { return vmlsq_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -1743,7 +1563,7 @@ int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { return vmls_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -1752,7 +1572,7 @@ int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { return vmlsq_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -1761,7 +1581,7 @@ int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { return vmls_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -1770,7 +1590,7 @@ int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { return vmlsq_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -1778,7 +1598,7 @@ int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) { return vmul_lane_s16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -1786,7 +1606,7 @@ int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) { return vmulq_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -1794,7 +1614,7 @@ int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) { return vmul_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -1802,7 +1622,7 @@ int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) { return vmulq_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -1810,7 +1630,7 @@ uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) { return vmul_lane_u16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -1818,7 +1638,7 @@ uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) { return vmulq_lane_u16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -1826,7 +1646,7 @@ uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) { return vmul_lane_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -1834,7 +1654,7 @@ uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) { return vmulq_lane_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -1842,7 +1662,7 @@ int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) { return vmul_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -1850,7 +1670,7 @@ int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) { return vmulq_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -1858,7 +1678,7 @@ int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) { return vmul_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -1866,7 +1686,7 @@ int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) { return vmulq_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -1874,7 +1694,7 @@ uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) { return vmul_laneq_u16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -1882,7 +1702,7 @@ uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) { return vmulq_laneq_u16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -1890,7 +1710,7 @@ uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) { return vmul_laneq_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -1898,7 +1718,7 @@ uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) { return vmulq_laneq_u32(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfma_lane_f32_0( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> @@ -1912,7 +1732,7 @@ float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { return vfma_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfmaq_lane_f32_0( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> @@ -1926,7 +1746,7 @@ float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmaq_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfma_laneq_f32_0( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> @@ -1940,7 +1760,7 @@ float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { return vfma_laneq_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfmaq_laneq_f32_0( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> @@ -1954,7 +1774,7 @@ float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) return vfmaq_laneq_f32(a, b, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfms_lane_f32_0( // CHECK: [[SUB:%.*]] = fsub <2 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> @@ -1969,7 +1789,7 @@ float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { return vfms_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vfmsq_lane_f32_0( // CHECK: [[SUB:%.*]] = fsub <4 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> @@ -1984,7 +1804,7 @@ float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmsq_lane_f32(a, b, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfms_laneq_f32_0( // CHECK: [[SUB:%.*]] = fsub <2 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> @@ -1999,7 +1819,7 @@ float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { return vfms_laneq_f32(a, b, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vfmsq_laneq_f32_0( // CHECK: [[SUB:%.*]] = fsub <4 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> @@ -2014,7 +1834,7 @@ float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) return vfmsq_laneq_f32(a, b, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vfmaq_laneq_f64_0( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> @@ -2028,7 +1848,7 @@ float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) return vfmaq_laneq_f64(a, b, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vfmsq_laneq_f64_0( // CHECK: [[SUB:%.*]] = fsub <2 x double> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> @@ -2043,991 +1863,821 @@ float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) return vfmsq_laneq_f64(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlal_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlal_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlal_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlal_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlal_high_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlal_high_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlal_high_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlal_high_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlsl_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlsl_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlsl_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlsl_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlsl_high_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlsl_high_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlsl_high_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlsl_high_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlal_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlal_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlal_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlal_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_u16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlal_high_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_lane_u32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlal_high_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_u16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlal_high_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlal_high_laneq_u32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlal_high_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vmlsl_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vmlsl_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vmlsl_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vmlsl_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_u16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vmlsl_high_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_lane_u32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vmlsl_high_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_u16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vmlsl_high_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsl_high_laneq_u32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vmlsl_high_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { return vmull_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { return vmull_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { return vmull_lane_u16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { return vmull_lane_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { return vmull_high_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { return vmull_high_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_u16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { return vmull_high_lane_u16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_lane_u32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { return vmull_high_lane_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { return vmull_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { return vmull_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { return vmull_laneq_u16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { return vmull_laneq_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { return vmull_high_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { return vmull_high_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_u16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { return vmull_high_laneq_u16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmull_high_laneq_u32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { return vmull_high_laneq_u32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlal_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlal_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_lane_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlal_high_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_lane_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlal_high_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlsl_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlsl_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_lane_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlsl_high_lane_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_lane_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlsl_high_lane_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { return vqdmull_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { return vqdmull_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { return vqdmull_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { return vqdmull_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_lane_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { return vqdmull_high_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_lane_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { return vqdmull_high_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_laneq_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { return vqdmull_high_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmull_high_laneq_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { return vqdmull_high_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { return vqdmulh_lane_s16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { return vqdmulhq_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { return vqdmulh_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { return vqdmulhq_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { return vqrdmulh_lane_s16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_lane_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 -// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { return vqrdmulhq_lane_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { return vqrdmulh_lane_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_lane_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 -// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { return vqrdmulhq_lane_s32(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmul_lane_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] // CHECK: ret <2 x float> [[MUL]] @@ -3035,7 +2685,7 @@ float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) { return vmul_lane_f32(a, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmulq_lane_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] // CHECK: ret <4 x float> [[MUL]] @@ -3043,7 +2693,7 @@ float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) { return vmulq_lane_f32(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] // CHECK: ret <2 x float> [[MUL]] @@ -3051,7 +2701,7 @@ float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) { return vmul_laneq_f32(a, v, 0); } -// CHECK-LABEL: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vmul_laneq_f64_0( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double @@ -3064,7 +2714,7 @@ float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) { return vmul_laneq_f64(a, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] // CHECK: ret <4 x float> [[MUL]] @@ -3072,7 +2722,7 @@ float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) { return vmulq_laneq_f32(a, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vmulq_laneq_f64_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] // CHECK: ret <2 x double> [[MUL]] @@ -3080,79 +2730,67 @@ float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { return vmulq_laneq_f64(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmulx_lane_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { return vmulx_lane_f32(a, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) #0 { +// CHECK-LABEL: @test_vmulxq_lane_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { return vmulxq_lane_f32(a, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) #0 { +// CHECK-LABEL: @test_vmulxq_lane_f64_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { return vmulxq_lane_f64(a, v, 0); } -// CHECK-LABEL: define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmulx_laneq_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { return vmulx_laneq_f32(a, v, 0); } -// CHECK-LABEL: define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) #0 { +// CHECK-LABEL: @test_vmulxq_laneq_f32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { return vmulxq_laneq_f32(a, v, 0); } -// CHECK-LABEL: define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) #0 { +// CHECK-LABEL: @test_vmulxq_laneq_f64_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { return vmulxq_laneq_f64(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmull_high_n_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 @@ -3160,29 +2798,25 @@ float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL4_I_I]]) #2 +// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: ret <4 x i32> [[VMULL5_I_I]] int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { return vmull_high_n_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmull_high_n_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL2_I_I]]) #2 +// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: ret <2 x i64> [[VMULL3_I_I]] int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { return vmull_high_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmull_high_n_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 @@ -3190,29 +2824,25 @@ int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL4_I_I]]) #2 +// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: ret <4 x i32> [[VMULL5_I_I]] uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { return vmull_high_n_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmull_high_n_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL2_I_I]]) #2 +// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: ret <2 x i64> [[VMULL3_I_I]] uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { return vmull_high_n_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqdmull_high_n_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 @@ -3220,33 +2850,27 @@ uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V4_I_I]]) #2 +// CHECK: [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: [[VQDMULL_V6_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V5_I_I]] int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { return vqdmull_high_n_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmull_high_n_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V2_I_I]]) #2 +// CHECK: [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: [[VQDMULL_V4_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V3_I_I]] int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { return vqdmull_high_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlal_high_n_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 @@ -3254,31 +2878,27 @@ int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { return vmlal_high_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlal_high_n_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { return vmlal_high_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlal_high_n_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 @@ -3286,31 +2906,27 @@ int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { return vmlal_high_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlal_high_n_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { return vmlal_high_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vqdmlal_high_n_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> @@ -3319,34 +2935,28 @@ uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL4_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL4_I_I]]) #2 -// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL5_I_I]]) #2 +// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I_I]] int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { return vqdmlal_high_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlal_high_n_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL2_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL2_I_I]]) #2 -// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL3_I_I]]) #2 +// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I_I]] int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { return vqdmlal_high_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlsl_high_n_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 @@ -3354,31 +2964,27 @@ int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { return vmlsl_high_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsl_high_n_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { return vmlsl_high_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlsl_high_n_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 @@ -3386,31 +2992,27 @@ int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { return vmlsl_high_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsl_high_n_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { return vmlsl_high_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_n_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> @@ -3419,34 +3021,28 @@ uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL4_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL4_I_I]]) #2 -// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL5_I_I]]) #2 +// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I_I]] int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { return vqdmlsl_high_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_n_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL2_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL2_I_I]]) #2 -// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL3_I_I]]) #2 +// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I_I]] int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { return vqdmlsl_high_n_s32(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { +// CHECK-LABEL: @test_vmul_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]] @@ -3455,7 +3051,7 @@ float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { return vmul_n_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { +// CHECK-LABEL: @test_vmulq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2 @@ -3466,7 +3062,7 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { return vmulq_n_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 { +// CHECK-LABEL: @test_vmulq_n_f64( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %b, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %a, [[VECINIT1_I]] @@ -3475,22 +3071,19 @@ float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) { return vmulq_n_f64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { +// CHECK-LABEL: @test_vfma_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2 -// CHECK: ret <2 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> [[VECINIT1_I]], <2 x float> %a) #2 +// CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { return vfma_n_f32(a, b, n); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { +// CHECK-LABEL: @test_vfmaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 @@ -3498,32 +3091,26 @@ float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2 -// CHECK: ret <4 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> [[VECINIT3_I]], <4 x float> %a) #2 +// CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmaq_n_f32(a, b, n); } -// CHECK-LABEL: define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { +// CHECK-LABEL: @test_vfms_n_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2 -// CHECK: ret <2 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> [[VECINIT1_I]], <2 x float> %a) #2 +// CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { return vfms_n_f32(a, b, n); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { +// CHECK-LABEL: @test_vfmsq_n_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 @@ -3532,16 +3119,13 @@ float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2 -// CHECK: ret <4 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> [[VECINIT3_I]], <4 x float> %a) #2 +// CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmsq_n_f32(a, b, n); } -// CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmul_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -3552,7 +3136,7 @@ int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { return vmul_n_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmulq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -3567,7 +3151,7 @@ int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { return vmulq_n_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmul_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] @@ -3576,7 +3160,7 @@ int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { return vmul_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmulq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 @@ -3587,7 +3171,7 @@ int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { return vmulq_n_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmul_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -3598,7 +3182,7 @@ uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { return vmul_n_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmulq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -3613,7 +3197,7 @@ uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { return vmulq_n_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmul_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] @@ -3622,7 +3206,7 @@ uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { return vmul_n_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmulq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 @@ -3633,112 +3217,95 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { return vmulq_n_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmull_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #2 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: ret <4 x i32> [[VMULL5_I]] int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { return vmull_n_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmull_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #2 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: ret <2 x i64> [[VMULL3_I]] int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { return vmull_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vmull_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #2 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: ret <4 x i32> [[VMULL5_I]] uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { return vmull_n_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmull_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #2 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: ret <2 x i64> [[VMULL3_I]] uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { return vmull_n_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqdmull_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #2 +// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V5_I]] int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { return vqdmull_n_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmull_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #2 +// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V3_I]] int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { return vqdmull_n_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqdmulh_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #2 +// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V5_I]] int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { return vqdmulh_n_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 @@ -3749,66 +3316,54 @@ int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #2 +// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #2 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]] int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { return vqdmulhq_n_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmulh_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #2 +// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V3_I]] int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { return vqdmulh_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #2 +// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #2 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]] int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { return vqdmulhq_n_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #2 +// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V5_I]] int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { return vqrdmulh_n_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 @@ -3819,49 +3374,40 @@ int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #2 +// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #2 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]] int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { return vqrdmulhq_n_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #2 +// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V3_I]] int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { return vqrdmulh_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #2 +// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #2 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]] int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { return vqrdmulhq_n_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmla_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -3873,7 +3419,7 @@ int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { return vmla_n_s16(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -3889,7 +3435,7 @@ int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { return vmlaq_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmla_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -3899,7 +3445,7 @@ int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { return vmla_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -3911,7 +3457,7 @@ int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { return vmlaq_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmla_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -3923,7 +3469,7 @@ uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { return vmla_n_u16(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -3939,7 +3485,7 @@ uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { return vmlaq_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmla_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -3949,7 +3495,7 @@ uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { return vmla_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -3961,67 +3507,59 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { return vmlaq_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlal_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vmlal_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlal_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vmlal_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlal_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { return vmlal_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlal_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { return vmlal_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vqdmlal_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 @@ -4029,33 +3567,27 @@ uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #2 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlal_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlal_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #2 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlal_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmls_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -4067,7 +3599,7 @@ int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { return vmls_n_s16(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -4083,7 +3615,7 @@ int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { return vmlsq_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmls_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -4093,7 +3625,7 @@ int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { return vmls_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -4105,7 +3637,7 @@ int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { return vmlsq_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmls_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -4117,7 +3649,7 @@ uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { return vmls_n_u16(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -4133,7 +3665,7 @@ uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { return vmlsq_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmls_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -4143,7 +3675,7 @@ uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { return vmls_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -4155,67 +3687,59 @@ uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { return vmlsq_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vmlsl_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vmlsl_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { return vmlsl_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { return vmlsl_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 @@ -4223,33 +3747,27 @@ uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #2 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlsl_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #2 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlsl_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -4258,7 +3776,7 @@ uint16x4_t test_vmla_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { return vmla_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -4267,7 +3785,7 @@ uint16x8_t test_vmlaq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { return vmlaq_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -4276,7 +3794,7 @@ uint32x2_t test_vmla_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { return vmla_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -4285,7 +3803,7 @@ uint32x4_t test_vmlaq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { return vmlaq_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_u16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -4294,7 +3812,7 @@ uint16x4_t test_vmla_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { return vmla_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_u16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -4303,7 +3821,7 @@ uint16x8_t test_vmlaq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { return vmlaq_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_u32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -4312,7 +3830,7 @@ uint32x2_t test_vmla_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { return vmla_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_u32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -4321,69 +3839,57 @@ uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { return vmlaq_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlal_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlal_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_laneq_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlal_high_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_laneq_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlal_high_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -4392,7 +3898,7 @@ uint16x4_t test_vmls_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { return vmls_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -4401,7 +3907,7 @@ uint16x8_t test_vmlsq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { return vmlsq_lane_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -4410,7 +3916,7 @@ uint32x2_t test_vmls_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { return vmls_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -4419,7 +3925,7 @@ uint32x4_t test_vmlsq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { return vmlsq_lane_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_u16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -4428,7 +3934,7 @@ uint16x4_t test_vmls_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { return vmls_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_u16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_u16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -4437,7 +3943,7 @@ uint16x8_t test_vmlsq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { return vmlsq_laneq_u16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_u32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -4446,7 +3952,7 @@ uint32x2_t test_vmls_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { return vmls_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_u32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_u32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -4455,181 +3961,145 @@ uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { return vmlsq_laneq_u32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlsl_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlsl_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_laneq_s16_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlsl_high_laneq_s16(a, b, v, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_laneq_s32_0( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlsl_high_laneq_s32(a, b, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { return vqdmulh_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { return vqdmulhq_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { return vqdmulh_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { return vqdmulhq_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { return vqrdmulh_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_laneq_s16_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { return vqrdmulhq_laneq_s16(a, v, 0); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { return vqrdmulh_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_laneq_s32_0( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { return vqrdmulhq_laneq_s32(a, v, 0); } -// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -4638,7 +4108,7 @@ uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { return vmla_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -4647,7 +4117,7 @@ uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { return vmlaq_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -4656,7 +4126,7 @@ uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { return vmla_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -4665,7 +4135,7 @@ uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { return vmlaq_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_u16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -4674,7 +4144,7 @@ uint16x4_t test_vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { return vmla_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -4683,7 +4153,7 @@ uint16x8_t test_vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { return vmlaq_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_u32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmla_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -4692,7 +4162,7 @@ uint32x2_t test_vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { return vmla_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlaq_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -4701,69 +4171,57 @@ uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { return vmlaq_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlal_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlal_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_laneq_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlal_high_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlal_high_laneq_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlal_high_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -4772,7 +4230,7 @@ uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { return vmls_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -4781,7 +4239,7 @@ uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { return vmlsq_lane_u16(a, b, v, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -4790,7 +4248,7 @@ uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { return vmls_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -4799,7 +4257,7 @@ uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { return vmlsq_lane_u32(a, b, v, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_u16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -4808,7 +4266,7 @@ uint16x4_t test_vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { return vmls_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -4817,7 +4275,7 @@ uint16x8_t test_vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { return vmlsq_laneq_u16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_u32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmls_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -4826,7 +4284,7 @@ uint32x2_t test_vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { return vmls_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vmlsq_laneq_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -4835,177 +4293,140 @@ uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { return vmlsq_laneq_u32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlsl_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlsl_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_laneq_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlsl_high_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_laneq_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlsl_high_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { return vqdmulh_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { return vqdmulhq_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulh_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { return vqdmulh_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqdmulhq_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { return vqdmulhq_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { return vqrdmulh_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_laneq_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { return vqrdmulhq_laneq_s16(a, v, 7); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulh_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { return vqrdmulh_laneq_s32(a, v, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { +// CHECK-LABEL: @test_vqrdmulhq_laneq_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { return vqrdmulhq_laneq_s32(a, v, 3); } - diff --git a/clang/test/CodeGen/aarch64-neon-across.c b/clang/test/CodeGen/aarch64-neon-across.c index 04a7b26e8a27..6d7a0d5bcde4 100644 --- a/clang/test/CodeGen/aarch64-neon-across.c +++ b/clang/test/CodeGen/aarch64-neon-across.c @@ -14,9 +14,7 @@ int16_t test_vaddlv_s8(int8x8_t a) { } // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #2 // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlv_s16(int16x4_t a) { return vaddlv_s16(a); @@ -31,9 +29,7 @@ uint16_t test_vaddlv_u8(uint8x8_t a) { } // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #2 // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlv_u16(uint16x4_t a) { return vaddlv_u16(a); @@ -48,18 +44,14 @@ int16_t test_vaddlvq_s8(int8x16_t a) { } // CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #2 // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlvq_s16(int16x8_t a) { return vaddlvq_s16(a); } // CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #2 // CHECK: ret i64 [[VADDLVQ_S32_I]] int64_t test_vaddlvq_s32(int32x4_t a) { return vaddlvq_s32(a); @@ -74,18 +66,14 @@ uint16_t test_vaddlvq_u8(uint8x16_t a) { } // CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #2 // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlvq_u16(uint16x8_t a) { return vaddlvq_u16(a); } // CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #2 // CHECK: ret i64 [[VADDLVQ_U32_I]] uint64_t test_vaddlvq_u32(uint32x4_t a) { return vaddlvq_u32(a); @@ -100,9 +88,7 @@ int8_t test_vmaxv_s8(int8x8_t a) { } // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxv_s16(int16x4_t a) { @@ -118,9 +104,7 @@ uint8_t test_vmaxv_u8(uint8x8_t a) { } // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxv_u16(uint16x4_t a) { @@ -136,9 +120,7 @@ int8_t test_vmaxvq_s8(int8x16_t a) { } // CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxvq_s16(int16x8_t a) { @@ -146,9 +128,7 @@ int16_t test_vmaxvq_s16(int16x8_t a) { } // CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #2 // CHECK: ret i32 [[VMAXVQ_S32_I]] int32_t test_vmaxvq_s32(int32x4_t a) { return vmaxvq_s32(a); @@ -163,9 +143,7 @@ uint8_t test_vmaxvq_u8(uint8x16_t a) { } // CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxvq_u16(uint16x8_t a) { @@ -173,9 +151,7 @@ uint16_t test_vmaxvq_u16(uint16x8_t a) { } // CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #2 // CHECK: ret i32 [[VMAXVQ_U32_I]] uint32_t test_vmaxvq_u32(uint32x4_t a) { return vmaxvq_u32(a); @@ -190,9 +166,7 @@ int8_t test_vminv_s8(int8x8_t a) { } // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminv_s16(int16x4_t a) { @@ -208,9 +182,7 @@ uint8_t test_vminv_u8(uint8x8_t a) { } // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminv_u16(uint16x4_t a) { @@ -226,9 +198,7 @@ int8_t test_vminvq_s8(int8x16_t a) { } // CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminvq_s16(int16x8_t a) { @@ -236,9 +206,7 @@ int16_t test_vminvq_s16(int16x8_t a) { } // CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #2 // CHECK: ret i32 [[VMINVQ_S32_I]] int32_t test_vminvq_s32(int32x4_t a) { return vminvq_s32(a); @@ -253,9 +221,7 @@ uint8_t test_vminvq_u8(uint8x16_t a) { } // CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminvq_u16(uint16x8_t a) { @@ -263,9 +229,7 @@ uint16_t test_vminvq_u16(uint16x8_t a) { } // CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #2 // CHECK: ret i32 [[VMINVQ_U32_I]] uint32_t test_vminvq_u32(uint32x4_t a) { return vminvq_u32(a); @@ -280,9 +244,7 @@ int8_t test_vaddv_s8(int8x8_t a) { } // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddv_s16(int16x4_t a) { @@ -298,9 +260,7 @@ uint8_t test_vaddv_u8(uint8x8_t a) { } // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[TMP1]]) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddv_u16(uint16x4_t a) { @@ -316,9 +276,7 @@ int8_t test_vaddvq_s8(int8x16_t a) { } // CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddvq_s16(int16x8_t a) { @@ -326,9 +284,7 @@ int16_t test_vaddvq_s16(int16x8_t a) { } // CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #2 // CHECK: ret i32 [[VADDVQ_S32_I]] int32_t test_vaddvq_s32(int32x4_t a) { return vaddvq_s32(a); @@ -343,9 +299,7 @@ uint8_t test_vaddvq_u8(uint8x16_t a) { } // CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[TMP1]]) #2 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #2 // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddvq_u16(uint16x8_t a) { @@ -353,45 +307,35 @@ uint16_t test_vaddvq_u16(uint16x8_t a) { } // CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[TMP1]]) #2 +// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #2 // CHECK: ret i32 [[VADDVQ_U32_I]] uint32_t test_vaddvq_u32(uint32x4_t a) { return vaddvq_u32(a); } // CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> [[TMP1]]) #2 +// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #2 // CHECK: ret float [[VMAXVQ_F32_I]] float32_t test_vmaxvq_f32(float32x4_t a) { return vmaxvq_f32(a); } // CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[TMP1]]) #2 +// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #2 // CHECK: ret float [[VMINVQ_F32_I]] float32_t test_vminvq_f32(float32x4_t a) { return vminvq_f32(a); } // CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> [[TMP1]]) #2 +// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #2 // CHECK: ret float [[VMAXNMVQ_F32_I]] float32_t test_vmaxnmvq_f32(float32x4_t a) { return vmaxnmvq_f32(a); } // CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[TMP1]]) #2 +// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #2 // CHECK: ret float [[VMINNMVQ_F32_I]] float32_t test_vminnmvq_f32(float32x4_t a) { return vminnmvq_f32(a); diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c index 836321af0609..6ada533c66ee 100644 --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -214,13 +214,7 @@ float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> [[VECINIT1_I]], <2 x double> %a) // CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { return vfmaq_n_f64(a, b, c); @@ -230,13 +224,7 @@ float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { // CHECK: [[SUB_I:%.*]] = fsub <2 x double> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #2 // CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { return vfmsq_n_f64(a, b, c); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index b087ce91e568..2ffbcdce372c 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -7,396 +7,392 @@ #include -// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vadd_s8( // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 // CHECK: ret <8 x i8> [[ADD_I]] int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) { return vadd_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vadd_s16( // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 // CHECK: ret <4 x i16> [[ADD_I]] int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) { return vadd_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vadd_s32( // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 // CHECK: ret <2 x i32> [[ADD_I]] int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) { return vadd_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 { +// CHECK-LABEL: @test_vadd_s64( // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 // CHECK: ret <1 x i64> [[ADD_I]] int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) { return vadd_s64(v1, v2); } -// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vadd_f32( // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2 // CHECK: ret <2 x float> [[ADD_I]] float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { return vadd_f32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vadd_u8( // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 // CHECK: ret <8 x i8> [[ADD_I]] uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) { return vadd_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vadd_u16( // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 // CHECK: ret <4 x i16> [[ADD_I]] uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) { return vadd_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vadd_u32( // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 // CHECK: ret <2 x i32> [[ADD_I]] uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) { return vadd_u32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 { +// CHECK-LABEL: @test_vadd_u64( // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 // CHECK: ret <1 x i64> [[ADD_I]] uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) { return vadd_u64(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vaddq_s8( // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 // CHECK: ret <16 x i8> [[ADD_I]] int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) { return vaddq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vaddq_s16( // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) { return vaddq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vaddq_s32( // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 // CHECK: ret <4 x i32> [[ADD_I]] -int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) { +int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) { return vaddq_s32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vaddq_s64( // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) { return vaddq_s64(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vaddq_f32( // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2 // CHECK: ret <4 x float> [[ADD_I]] float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) { return vaddq_f32(v1, v2); } -// CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vaddq_f64( // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2 // CHECK: ret <2 x double> [[ADD_I]] float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) { return vaddq_f64(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vaddq_u8( // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 // CHECK: ret <16 x i8> [[ADD_I]] uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) { return vaddq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vaddq_u16( // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) { return vaddq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vaddq_u32( // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) { return vaddq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vaddq_u64( // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) { return vaddq_u64(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vsub_s8( // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 // CHECK: ret <8 x i8> [[SUB_I]] int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) { return vsub_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { + +// CHECK-LABEL: @test_vsub_s16( // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 // CHECK: ret <4 x i16> [[SUB_I]] int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) { return vsub_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { + +// CHECK-LABEL: @test_vsub_s32( // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 // CHECK: ret <2 x i32> [[SUB_I]] int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) { return vsub_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 { +// CHECK-LABEL: @test_vsub_s64( // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 // CHECK: ret <1 x i64> [[SUB_I]] int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) { return vsub_s64(v1, v2); } -// CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vsub_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2 // CHECK: ret <2 x float> [[SUB_I]] float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { return vsub_f32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vsub_u8( // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 // CHECK: ret <8 x i8> [[SUB_I]] uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) { return vsub_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vsub_u16( // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 // CHECK: ret <4 x i16> [[SUB_I]] uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) { return vsub_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vsub_u32( // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 // CHECK: ret <2 x i32> [[SUB_I]] uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) { return vsub_u32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 { +// CHECK-LABEL: @test_vsub_u64( // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 // CHECK: ret <1 x i64> [[SUB_I]] uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) { return vsub_u64(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vsubq_s8( // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 // CHECK: ret <16 x i8> [[SUB_I]] int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) { return vsubq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vsubq_s16( // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) { return vsubq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vsubq_s32( // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 // CHECK: ret <4 x i32> [[SUB_I]] -int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) { +int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) { return vsubq_s32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vsubq_s64( // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) { return vsubq_s64(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vsubq_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2 // CHECK: ret <4 x float> [[SUB_I]] float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) { return vsubq_f32(v1, v2); } -// CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vsubq_f64( // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2 // CHECK: ret <2 x double> [[SUB_I]] float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) { return vsubq_f64(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vsubq_u8( // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 // CHECK: ret <16 x i8> [[SUB_I]] uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) { return vsubq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vsubq_u16( // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 // CHECK: ret <8 x i16> [[SUB_I]] uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) { return vsubq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vsubq_u32( // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) { return vsubq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vsubq_u64( // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) { return vsubq_u64(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vmul_s8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 // CHECK: ret <8 x i8> [[MUL_I]] int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) { return vmul_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vmul_s16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 // CHECK: ret <4 x i16> [[MUL_I]] int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) { return vmul_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vmul_s32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 // CHECK: ret <2 x i32> [[MUL_I]] int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) { return vmul_s32(v1, v2); } -// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vmul_f32( // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2 // CHECK: ret <2 x float> [[MUL_I]] float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { return vmul_f32(v1, v2); } - -// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vmul_u8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 // CHECK: ret <8 x i8> [[MUL_I]] uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) { return vmul_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vmul_u16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 // CHECK: ret <4 x i16> [[MUL_I]] uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) { return vmul_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vmul_u32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 // CHECK: ret <2 x i32> [[MUL_I]] uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) { return vmul_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vmulq_s8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 // CHECK: ret <16 x i8> [[MUL_I]] int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) { return vmulq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vmulq_s16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 // CHECK: ret <8 x i16> [[MUL_I]] int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) { return vmulq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vmulq_s32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 // CHECK: ret <4 x i32> [[MUL_I]] int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) { return vmulq_s32(v1, v2); } - -// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { + +// CHECK-LABEL: @test_vmulq_u8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 // CHECK: ret <16 x i8> [[MUL_I]] uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) { return vmulq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vmulq_u16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 // CHECK: ret <8 x i16> [[MUL_I]] uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) { return vmulq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vmulq_u32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 // CHECK: ret <4 x i32> [[MUL_I]] uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) { return vmulq_u32(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vmulq_f32( // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2 // CHECK: ret <4 x float> [[MUL_I]] float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) { return vmulq_f32(v1, v2); } -// CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vmulq_f64( // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2 // CHECK: ret <2 x double> [[MUL_I]] float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { return vmulq_f64(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vmul_p8( // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VMUL_V_I]] poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { - // test_vmul_p8 return vmul_p8(v1, v2); - // pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b } -// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vmulq_p8( // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VMULQ_V_I]] poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { - // test_vmulq_p8 return vmulq_p8(v1, v2); - // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b } - -// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vmla_s8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -404,7 +400,7 @@ int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { return vmla_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmla_s16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8> @@ -413,7 +409,7 @@ int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { return vmla_s16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmla_s32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] // CHECK: ret <2 x i32> [[ADD_I]] @@ -421,7 +417,7 @@ int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { return vmla_s32(v1, v2, v3); } -// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { +// CHECK-LABEL: @test_vmla_f32( // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]] // CHECK: ret <2 x float> [[ADD_I]] @@ -429,7 +425,7 @@ float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vmla_f32(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vmla_u8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -437,7 +433,7 @@ uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { return vmla_u8(v1, v2, v3); } -// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmla_u16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] // CHECK: ret <4 x i16> [[ADD_I]] @@ -445,7 +441,7 @@ uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { return vmla_u16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmla_u32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] // CHECK: ret <2 x i32> [[ADD_I]] @@ -453,7 +449,7 @@ uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { return vmla_u32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_s8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -461,7 +457,7 @@ int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { return vmlaq_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_s16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -469,15 +465,15 @@ int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { return vmlaq_s16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_s32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { return vmlaq_s32(v1, v2, v3); -} +} -// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_f32( // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]] // CHECK: ret <4 x float> [[ADD_I]] @@ -485,7 +481,7 @@ float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vmlaq_f32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_u8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -493,7 +489,7 @@ uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { return vmlaq_u8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_u16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -501,7 +497,7 @@ uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { return vmlaq_u16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_u32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] // CHECK: ret <4 x i32> [[ADD_I]] @@ -509,7 +505,7 @@ uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { return vmlaq_u32(v1, v2, v3); } -// CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { +// CHECK-LABEL: @test_vmlaq_f64( // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]] // CHECK: ret <2 x double> [[ADD_I]] @@ -517,7 +513,7 @@ float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vmlaq_f64(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vmls_s8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] // CHECK: ret <8 x i8> [[SUB_I]] @@ -525,7 +521,7 @@ int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { return vmls_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmls_s16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8> @@ -534,7 +530,7 @@ int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { return vmls_s16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmls_s32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] // CHECK: ret <2 x i32> [[SUB_I]] @@ -542,7 +538,7 @@ int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { return vmls_s32(v1, v2, v3); } -// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { +// CHECK-LABEL: @test_vmls_f32( // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]] // CHECK: ret <2 x float> [[SUB_I]] @@ -550,7 +546,7 @@ float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vmls_f32(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vmls_u8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] // CHECK: ret <8 x i8> [[SUB_I]] @@ -558,7 +554,7 @@ uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { return vmls_u8(v1, v2, v3); } -// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmls_u16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] // CHECK: ret <4 x i16> [[SUB_I]] @@ -566,14 +562,15 @@ uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { return vmls_u16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmls_u32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] // CHECK: ret <2 x i32> [[SUB_I]] uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { return vmls_u32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { + +// CHECK-LABEL: @test_vmlsq_s8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] // CHECK: ret <16 x i8> [[SUB_I]] @@ -581,7 +578,7 @@ int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { return vmlsq_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmlsq_s16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -589,7 +586,7 @@ int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { return vmlsq_s16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmlsq_s32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] // CHECK: ret <4 x i32> [[SUB_I]] @@ -597,14 +594,15 @@ int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { return vmlsq_s32(v1, v2, v3); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { +// CHECK-LABEL: @test_vmlsq_f32( // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]] // CHECK: ret <4 x float> [[SUB_I]] float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vmlsq_f32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { + +// CHECK-LABEL: @test_vmlsq_u8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] // CHECK: ret <16 x i8> [[SUB_I]] @@ -612,7 +610,7 @@ uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { return vmlsq_u8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vmlsq_u16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -620,7 +618,7 @@ uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { return vmlsq_u16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vmlsq_u32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] // CHECK: ret <4 x i32> [[SUB_I]] @@ -628,115 +626,99 @@ uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { return vmlsq_u32(v1, v2, v3); } -// CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { +// CHECK-LABEL: @test_vmlsq_f64( // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]] // CHECK: ret <2 x double> [[SUB_I]] float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vmlsq_f64(v1, v2, v3); } -// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { + +// CHECK-LABEL: @test_vfma_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 -// CHECK: ret <2 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1) #4 +// CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vfma_f32(v1, v2, v3); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { +// CHECK-LABEL: @test_vfmaq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 -// CHECK: ret <4 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1) #4 +// CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vfmaq_f32(v1, v2, v3); } -// CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { +// CHECK-LABEL: @test_vfmaq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 -// CHECK: ret <2 x double> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1) #4 +// CHECK: ret <2 x double> [[TMP3]] float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vfmaq_f64(v1, v2, v3); } -// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { + +// CHECK-LABEL: @test_vfms_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> , %v2 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 -// CHECK: ret <2 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1) #4 +// CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vfms_f32(v1, v2, v3); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { +// CHECK-LABEL: @test_vfmsq_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %v2 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 -// CHECK: ret <4 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1) #4 +// CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vfmsq_f32(v1, v2, v3); } -// CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { +// CHECK-LABEL: @test_vfmsq_f64( // CHECK: [[SUB_I:%.*]] = fsub <2 x double> , %v2 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 -// CHECK: ret <2 x double> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1) #4 +// CHECK: ret <2 x double> [[TMP3]] float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vfmsq_f64(v1, v2, v3); } -// CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vdivq_f64( // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2 // CHECK: ret <2 x double> [[DIV_I]] float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) { return vdivq_f64(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vdivq_f32( // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2 // CHECK: ret <4 x float> [[DIV_I]] float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) { return vdivq_f32(v1, v2); } -// CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vdiv_f32( // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2 // CHECK: ret <2 x float> [[DIV_I]] float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { return vdiv_f32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vaba_s8( // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -744,31 +726,27 @@ int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { return vaba_s8(v1, v2, v3); } -// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vaba_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { return vaba_s16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vaba_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { return vaba_s32(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vaba_u8( // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -776,31 +754,27 @@ uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { return vaba_u8(v1, v2, v3); } -// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vaba_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { return vaba_u16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vaba_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { return vaba_u32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vabaq_s8( // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -808,31 +782,27 @@ int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { return vabaq_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vabaq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { return vabaq_s16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vabaq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { return vabaq_s32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vabaq_u8( // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -840,181 +810,154 @@ uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { return vabaq_u8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vabaq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { return vabaq_u16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vabaq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { return vabaq_u32(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vabd_s8( // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VABD_I]] int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { return vabd_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vabd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: ret <4 x i16> [[VABD2_I]] int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { return vabd_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vabd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: ret <2 x i32> [[VABD2_I]] int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { return vabd_s32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vabd_u8( // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VABD_I]] uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { return vabd_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vabd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: ret <4 x i16> [[VABD2_I]] uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { return vabd_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vabd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: ret <2 x i32> [[VABD2_I]] uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { return vabd_u32(v1, v2); } -// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vabd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2) #4 // CHECK: ret <2 x float> [[VABD2_I]] float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { return vabd_f32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vabdq_s8( // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VABD_I]] int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { return vabdq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vabdq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: ret <8 x i16> [[VABD2_I]] int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { return vabdq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vabdq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: ret <4 x i32> [[VABD2_I]] int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { return vabdq_s32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vabdq_u8( // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VABD_I]] uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { return vabdq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vabdq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: ret <8 x i16> [[VABD2_I]] uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { return vabdq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vabdq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: ret <4 x i32> [[VABD2_I]] uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { return vabdq_u32(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vabdq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2) #4 // CHECK: ret <4 x float> [[VABD2_I]] float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { return vabdq_f32(v1, v2); } -// CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vabdq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2) #4 // CHECK: ret <2 x double> [[VABD2_I]] float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { return vabdq_f64(v1, v2); } - -// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vbsl_s8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 @@ -1024,16 +967,13 @@ int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { return vbsl_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vbsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> // CHECK: ret <8 x i8> [[TMP4]] @@ -1041,39 +981,33 @@ int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { return vbsl_s16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vbsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i32> [[VBSL5_I]] int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { return vbsl_s32(v1, v2, v3); } -// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 { +// CHECK-LABEL: @test_vbsl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { return vbsl_s64(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vbsl_u8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 @@ -1083,64 +1017,54 @@ uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { return vbsl_u8(v1, v2, v3); } -// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vbsl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i16> [[VBSL5_I]] uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { return vbsl_u16(v1, v2, v3); } -// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { +// CHECK-LABEL: @test_vbsl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i32> [[VBSL5_I]] uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { return vbsl_u32(v1, v2, v3); } -// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 { +// CHECK-LABEL: @test_vbsl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { return vbsl_u64(v1, v2, v3); } -// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { +// CHECK-LABEL: @test_vbsl_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], +// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[TMP0]], [[VBSL1_I]] +// CHECK: [[TMP4:%.*]] = xor <2 x i32> [[TMP0]], // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> @@ -1149,15 +1073,14 @@ float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vbsl_f32(v1, v2, v3); } -// CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 { +// CHECK-LABEL: @test_vbsl_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], +// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]] +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> @@ -1166,7 +1089,7 @@ float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { return vbsl_f64(v1, v2, v3); } -// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { +// CHECK-LABEL: @test_vbsl_p8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 @@ -1176,23 +1099,20 @@ poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { return vbsl_p8(v1, v2, v3); } -// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { +// CHECK-LABEL: @test_vbsl_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i16> [[VBSL5_I]] poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { return vbsl_p16(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vbslq_s8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 @@ -1202,55 +1122,46 @@ int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { return vbslq_s8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vbslq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { return vbslq_s16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vbslq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i32> [[VBSL5_I]] int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { return vbslq_s32(v1, v2, v3); } -// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 { +// CHECK-LABEL: @test_vbslq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { return vbslq_s64(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vbslq_u8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 @@ -1260,63 +1171,53 @@ uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { return vbslq_u8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vbslq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { return vbslq_u16(v1, v2, v3); } -// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { +// CHECK-LABEL: @test_vbslq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i32> [[VBSL5_I]] int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { return vbslq_s32(v1, v2, v3); } -// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 { +// CHECK-LABEL: @test_vbslq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { return vbslq_u64(v1, v2, v3); } -// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 { +// CHECK-LABEL: @test_vbslq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], +// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]] +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> @@ -1325,7 +1226,7 @@ float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { return vbslq_f32(v1, v2, v3); } -// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { +// CHECK-LABEL: @test_vbslq_p8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 @@ -1335,31 +1236,27 @@ poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { return vbslq_p8(v1, v2, v3); } -// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { +// CHECK-LABEL: @test_vbslq_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { return vbslq_p16(v1, v2, v3); } -// CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 { +// CHECK-LABEL: @test_vbslq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], +// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]] +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> @@ -1368,267 +1265,216 @@ float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { return vbslq_f64(v1, v2, v3); } -// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vrecps_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4 -// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2) #4 +// CHECK: ret <2 x float> [[VRECPS_V2_I]] float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { - return vrecps_f32(v1, v2); + return vrecps_f32(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vrecpsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4 +// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2) #4 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VRECPSQ_V2_I]] float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { - return vrecpsq_f32(v1, v2); + return vrecpsq_f32(v1, v2); } -// CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vrecpsq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4 +// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2) #4 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double> -// CHECK: ret <2 x double> [[TMP2]] +// CHECK: ret <2 x double> [[VRECPSQ_V2_I]] float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { return vrecpsq_f64(v1, v2); } -// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vrsqrts_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4 +// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2) #4 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VRSQRTS_V2_I]] float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { return vrsqrts_f32(v1, v2); } -// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vrsqrtsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4 +// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2) #4 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]] float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { return vrsqrtsq_f32(v1, v2); } -// CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vrsqrtsq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4 +// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2) #4 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double> -// CHECK: ret <2 x double> [[TMP2]] +// CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]] float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { return vrsqrtsq_f64(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcage_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4 +// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) #4 // CHECK: ret <2 x i32> [[VCAGE_V2_I]] uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { return vcage_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcage_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4 +// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x i64> [[VCAGE_V2_I]] uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { return vcage_f64(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcageq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4 +// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) #4 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { return vcageq_f32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcageq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4 +// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) #4 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]] uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { return vcageq_f64(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcagt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4 +// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) #4 // CHECK: ret <2 x i32> [[VCAGT_V2_I]] uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { return vcagt_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcagt_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4 +// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x i64> [[VCAGT_V2_I]] uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { return vcagt_f64(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcagtq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4 +// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) #4 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { return vcagtq_f32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcagtq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4 +// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) #4 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]] uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { return vcagtq_f64(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcale_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4 +// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) #4 // CHECK: ret <2 x i32> [[VCALE_V2_I]] uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { return vcale_f32(v1, v2); // Using registers other than v0, v1 are possible, but would be odd. } -// CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcale_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4 +// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCALE_V2_I]] uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { return vcale_f64(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcaleq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4 +// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) #4 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { return vcaleq_f32(v1, v2); // Using registers other than v0, v1 are possible, but would be odd. } -// CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcaleq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4 +// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) #4 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]] uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { return vcaleq_f64(v1, v2); // Using registers other than v0, v1 are possible, but would be odd. } -// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcalt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4 +// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) #4 // CHECK: ret <2 x i32> [[VCALT_V2_I]] uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { return vcalt_f32(v1, v2); // Using registers other than v0, v1 are possible, but would be odd. } -// CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcalt_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4 +// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCALT_V2_I]] uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { return vcalt_f64(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcaltq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4 +// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) #4 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { return vcaltq_f32(v1, v2); // Using registers other than v0, v1 are possible, but would be odd. } -// CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcaltq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4 +// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) #4 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]] uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { return vcaltq_f64(v1, v2); // Using registers other than v0, v1 are possible, but would be odd. } -// CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vtst_s8( // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> @@ -1637,33 +1483,29 @@ uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) { return vtst_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vtst_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> +// CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> // CHECK: ret <4 x i16> [[VTST_I]] uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) { return vtst_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vtst_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> +// CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> // CHECK: ret <2 x i32> [[VTST_I]] uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) { return vtst_s32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vtst_u8( // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> @@ -1672,33 +1514,29 @@ uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) { return vtst_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vtst_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> +// CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> // CHECK: ret <4 x i16> [[VTST_I]] uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) { return vtst_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vtst_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> +// CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> // CHECK: ret <2 x i32> [[VTST_I]] uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) { return vtst_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vtstq_s8( // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> @@ -1707,33 +1545,29 @@ uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) { return vtstq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vtstq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +// CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> // CHECK: ret <8 x i16> [[VTST_I]] uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) { return vtstq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vtstq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> // CHECK: ret <4 x i32> [[VTST_I]] uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) { return vtstq_s32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vtstq_u8( // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> @@ -1742,59 +1576,51 @@ uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) { return vtstq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vtstq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +// CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> // CHECK: ret <8 x i16> [[VTST_I]] uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) { return vtstq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vtstq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> // CHECK: ret <4 x i32> [[VTST_I]] uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) { return vtstq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vtstq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> +// CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> // CHECK: ret <2 x i64> [[VTST_I]] uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) { return vtstq_s64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vtstq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> +// CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> // CHECK: ret <2 x i64> [[VTST_I]] uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) { return vtstq_u64(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vtst_p8( // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> @@ -1803,20 +1629,18 @@ uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) { return vtst_p8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vtst_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> +// CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> // CHECK: ret <4 x i16> [[VTST_I]] uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) { return vtst_p16(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vtstq_p8( // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> @@ -1825,46 +1649,40 @@ uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) { return vtstq_p8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vtstq_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +// CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 +// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> // CHECK: ret <8 x i16> [[VTST_I]] uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) { return vtstq_p16(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vtst_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> +// CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> // CHECK: ret <1 x i64> [[VTST_I]] uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) { return vtst_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vtst_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> +// CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> // CHECK: ret <1 x i64> [[VTST_I]] uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) { return vtst_u64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vceq_s8( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1872,7 +1690,7 @@ uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { return vceq_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vceq_s16( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1880,7 +1698,7 @@ uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) { return vceq_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vceq_s32( // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1888,7 +1706,7 @@ uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { return vceq_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vceq_s64( // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -1896,7 +1714,7 @@ uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { return vceq_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vceq_u64( // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -1904,7 +1722,7 @@ uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { return vceq_u64(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vceq_f32( // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1912,7 +1730,7 @@ uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { return vceq_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vceq_f64( // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -1920,7 +1738,7 @@ uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { return vceq_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vceq_u8( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1928,7 +1746,7 @@ uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { return vceq_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vceq_u16( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1936,7 +1754,7 @@ uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) { return vceq_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vceq_u32( // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1944,7 +1762,7 @@ uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) { return vceq_u32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vceq_p8( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1952,7 +1770,7 @@ uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) { return vceq_p8(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vceqq_s8( // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1960,7 +1778,7 @@ uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) { return vceqq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vceqq_s16( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1968,7 +1786,7 @@ uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) { return vceqq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vceqq_s32( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1976,7 +1794,7 @@ uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) { return vceqq_s32(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vceqq_f32( // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1984,7 +1802,7 @@ uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { return vceqq_f32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vceqq_u8( // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1992,7 +1810,7 @@ uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) { return vceqq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vceqq_u16( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2000,7 +1818,7 @@ uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) { return vceqq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vceqq_u32( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2008,7 +1826,7 @@ uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) { return vceqq_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vceqq_p8( // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2016,8 +1834,7 @@ uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) { return vceqq_p8(v1, v2); } - -// CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vceqq_s64( // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2025,7 +1842,7 @@ uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) { return vceqq_s64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vceqq_u64( // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2033,14 +1850,15 @@ uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) { return vceqq_u64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vceqq_f64( // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { return vceqq_f64(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { + +// CHECK-LABEL: @test_vcge_s8( // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2048,7 +1866,7 @@ uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) { return vcge_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcge_s16( // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2056,7 +1874,7 @@ uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) { return vcge_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcge_s32( // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2064,7 +1882,7 @@ uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { return vcge_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcge_s64( // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2072,7 +1890,7 @@ uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { return vcge_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcge_u64( // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2080,7 +1898,7 @@ uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { return vcge_u64(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcge_f32( // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2088,7 +1906,7 @@ uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { return vcge_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcge_f64( // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2096,7 +1914,7 @@ uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { return vcge_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcge_u8( // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2104,7 +1922,7 @@ uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { return vcge_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcge_u16( // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2112,7 +1930,7 @@ uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) { return vcge_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcge_u32( // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2120,7 +1938,7 @@ uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) { return vcge_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_s8( // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2128,7 +1946,7 @@ uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) { return vcgeq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_s16( // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2136,7 +1954,7 @@ uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) { return vcgeq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_s32( // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2144,7 +1962,7 @@ uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) { return vcgeq_s32(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_f32( // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2152,7 +1970,7 @@ uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { return vcgeq_f32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_u8( // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2160,7 +1978,7 @@ uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) { return vcgeq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_u16( // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2168,7 +1986,7 @@ uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) { return vcgeq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_u32( // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2176,7 +1994,7 @@ uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) { return vcgeq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_s64( // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2184,7 +2002,7 @@ uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) { return vcgeq_s64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_u64( // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2192,7 +2010,7 @@ uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) { return vcgeq_u64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcgeq_f64( // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2200,18 +2018,18 @@ uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { return vcgeq_f64(v1, v2); } -// Notes about vcle: -// LE condition predicate implemented as GE, so check reversed operands. -// Using registers other than v0, v1 are possible, but would be odd. -// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcle_s8( // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] +// Notes about vcle: +// LE condition predicate implemented as GE, so check reversed operands. +// Using registers other than v0, v1 are possible, but would be odd. uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) { return vcle_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcle_s16( // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2219,7 +2037,7 @@ uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) { return vcle_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcle_s32( // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2227,7 +2045,7 @@ uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { return vcle_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcle_s64( // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2235,7 +2053,7 @@ uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { return vcle_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcle_u64( // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2243,7 +2061,7 @@ uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { return vcle_u64(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcle_f32( // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2251,7 +2069,7 @@ uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { return vcle_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcle_f64( // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2259,7 +2077,7 @@ uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { return vcle_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcle_u8( // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2267,7 +2085,7 @@ uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { return vcle_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcle_u16( // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2275,7 +2093,7 @@ uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) { return vcle_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcle_u32( // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2283,7 +2101,7 @@ uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) { return vcle_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcleq_s8( // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2291,7 +2109,7 @@ uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) { return vcleq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcleq_s16( // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2299,7 +2117,7 @@ uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) { return vcleq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcleq_s32( // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2307,7 +2125,7 @@ uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) { return vcleq_s32(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcleq_f32( // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2315,7 +2133,7 @@ uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { return vcleq_f32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcleq_u8( // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2323,7 +2141,7 @@ uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) { return vcleq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcleq_u16( // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2331,7 +2149,7 @@ uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) { return vcleq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcleq_u32( // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2339,7 +2157,7 @@ uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) { return vcleq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcleq_s64( // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2347,7 +2165,7 @@ uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) { return vcleq_s64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcleq_u64( // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2355,7 +2173,7 @@ uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) { return vcleq_u64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcleq_f64( // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2363,8 +2181,7 @@ uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { return vcleq_f64(v1, v2); } - -// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcgt_s8( // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2372,7 +2189,7 @@ uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) { return vcgt_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcgt_s16( // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2380,7 +2197,7 @@ uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) { return vcgt_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcgt_s32( // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2388,7 +2205,7 @@ uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { return vcgt_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcgt_s64( // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2396,7 +2213,7 @@ uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { return vcgt_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcgt_u64( // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2404,7 +2221,7 @@ uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { return vcgt_u64(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vcgt_f32( // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2412,7 +2229,7 @@ uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { return vcgt_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vcgt_f64( // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2420,7 +2237,7 @@ uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { return vcgt_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcgt_u8( // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2428,7 +2245,7 @@ uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { return vcgt_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcgt_u16( // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2436,7 +2253,7 @@ uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) { return vcgt_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcgt_u32( // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2444,7 +2261,7 @@ uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) { return vcgt_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_s8( // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2452,7 +2269,7 @@ uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) { return vcgtq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_s16( // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2460,7 +2277,7 @@ uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) { return vcgtq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_s32( // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2468,7 +2285,7 @@ uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) { return vcgtq_s32(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_f32( // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2476,7 +2293,7 @@ uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { return vcgtq_f32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_u8( // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2484,7 +2301,7 @@ uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) { return vcgtq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_u16( // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2492,7 +2309,7 @@ uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) { return vcgtq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_u32( // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2500,7 +2317,7 @@ uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) { return vcgtq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_s64( // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2508,7 +2325,7 @@ uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) { return vcgtq_s64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_u64( // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2516,7 +2333,7 @@ uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) { return vcgtq_u64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcgtq_f64( // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2524,20 +2341,18 @@ uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { return vcgtq_f64(v1, v2); } - -// Notes about vclt: -// LT condition predicate implemented as GT, so check reversed operands. -// Using registers other than v0, v1 are possible, but would be odd. - -// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vclt_s8( // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] +// Notes about vclt: +// LT condition predicate implemented as GT, so check reversed operands. +// Using registers other than v0, v1 are possible, but would be odd. uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) { return vclt_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vclt_s16( // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2545,7 +2360,7 @@ uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) { return vclt_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vclt_s32( // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2553,7 +2368,7 @@ uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { return vclt_s32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vclt_s64( // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2561,7 +2376,7 @@ uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { return vclt_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vclt_u64( // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2569,7 +2384,7 @@ uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { return vclt_u64(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 { +// CHECK-LABEL: @test_vclt_f32( // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2577,7 +2392,7 @@ uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { return vclt_f32(v1, v2); } -// CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vclt_f64( // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // CHECK: ret <1 x i64> [[SEXT_I]] @@ -2585,7 +2400,7 @@ uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { return vclt_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vclt_u8( // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2593,7 +2408,7 @@ uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { return vclt_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vclt_u16( // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2601,7 +2416,7 @@ uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) { return vclt_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vclt_u32( // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2609,7 +2424,7 @@ uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) { return vclt_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcltq_s8( // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2617,7 +2432,7 @@ uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) { return vcltq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcltq_s16( // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2625,7 +2440,7 @@ uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) { return vcltq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcltq_s32( // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2633,7 +2448,7 @@ uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) { return vcltq_s32(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 { +// CHECK-LABEL: @test_vcltq_f32( // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2641,7 +2456,7 @@ uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { return vcltq_f32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vcltq_u8( // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2649,7 +2464,7 @@ uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) { return vcltq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vcltq_u16( // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2657,7 +2472,7 @@ uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) { return vcltq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vcltq_u32( // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2665,7 +2480,7 @@ uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) { return vcltq_u32(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcltq_s64( // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2673,7 +2488,7 @@ uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) { return vcltq_s64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { +// CHECK-LABEL: @test_vcltq_u64( // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2681,7 +2496,7 @@ uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) { return vcltq_u64(v1, v2); } -// CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 { +// CHECK-LABEL: @test_vcltq_f64( // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // CHECK: ret <2 x i64> [[SEXT_I]] @@ -2689,1513 +2504,1219 @@ uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { return vcltq_f64(v1, v2); } - -// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhadd_s8( // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VHADD_V_I]] int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { return vhadd_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHADD_V2_I]] int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { return vhadd_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHADD_V2_I]] int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { return vhadd_s32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhadd_u8( // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VHADD_V_I]] uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { return vhadd_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHADD_V2_I]] uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { return vhadd_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHADD_V2_I]] uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { return vhadd_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhaddq_s8( // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { return vhaddq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHADDQ_V2_I]] int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { return vhaddq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHADDQ_V2_I]] int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { return vhaddq_s32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhaddq_u8( // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { return vhaddq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHADDQ_V2_I]] uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { return vhaddq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHADDQ_V2_I]] uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { return vhaddq_u32(v1, v2); } - -// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhsub_s8( // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VHSUB_V_I]] int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { return vhsub_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHSUB_V2_I]] int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { return vhsub_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHSUB_V2_I]] int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { return vhsub_s32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhsub_u8( // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VHSUB_V_I]] uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { return vhsub_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHSUB_V2_I]] uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { return vhsub_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHSUB_V2_I]] uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { return vhsub_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhsubq_s8( // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { return vhsubq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { return vhsubq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { return vhsubq_s32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vhsubq_u8( // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { return vhsubq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vhsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { return vhsubq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vhsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { return vhsubq_u32(v1, v2); } - -// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vrhadd_s8( // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VRHADD_V_I]] int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { return vrhadd_s8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vrhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRHADD_V2_I]] int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { return vrhadd_s16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vrhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRHADD_V2_I]] int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { return vrhadd_s32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { +// CHECK-LABEL: @test_vrhadd_u8( // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 // CHECK: ret <8 x i8> [[VRHADD_V_I]] uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { return vrhadd_u8(v1, v2); } -// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { +// CHECK-LABEL: @test_vrhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRHADD_V2_I]] uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { return vrhadd_u16(v1, v2); } -// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { +// CHECK-LABEL: @test_vrhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRHADD_V2_I]] uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { return vrhadd_u32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vrhaddq_s8( // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { return vrhaddq_s8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vrhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { return vrhaddq_s16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vrhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { return vrhaddq_s32(v1, v2); } -// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { +// CHECK-LABEL: @test_vrhaddq_u8( // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { return vrhaddq_u8(v1, v2); } -// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { +// CHECK-LABEL: @test_vrhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { return vrhaddq_u16(v1, v2); } -// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { +// CHECK-LABEL: @test_vrhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { return vrhaddq_u32(v1, v2); } -// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vqadd_s8( // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQADD_V_I]] int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { return vqadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQADD_V2_I]] int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { return vqadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQADD_V2_I]] int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { return vqadd_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQADD_V2_I]] int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { return vqadd_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqadd_u8( // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQADD_V_I]] uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { return vqadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQADD_V2_I]] uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { return vqadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQADD_V2_I]] uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { return vqadd_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQADD_V2_I]] uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { return vqadd_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s8( // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { return vqaddq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQADDQ_V2_I]] int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { return vqaddq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQADDQ_V2_I]] int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { return vqaddq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQADDQ_V2_I]] int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { return vqaddq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u8( // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { return vqaddq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQADDQ_V2_I]] uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { return vqaddq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQADDQ_V2_I]] uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { return vqaddq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQADDQ_V2_I]] uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { return vqaddq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsub_s8( // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSUB_V_I]] int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { return vqsub_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSUB_V2_I]] int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { return vqsub_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSUB_V2_I]] int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { return vqsub_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsub_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSUB_V2_I]] int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { return vqsub_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsub_u8( // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSUB_V_I]] uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { return vqsub_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSUB_V2_I]] uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { return vqsub_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSUB_V2_I]] uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { return vqsub_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsub_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSUB_V2_I]] uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { return vqsub_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s8( // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { return vqsubq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { return vqsubq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { return vqsubq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { return vqsubq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u8( // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { return vqsubq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { return vqsubq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { return vqsubq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { return vqsubq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vshl_s8( // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VSHL_V_I]] int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { return vshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VSHL_V2_I]] int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { return vshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VSHL_V2_I]] int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { return vshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VSHL_V2_I]] int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { return vshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vshl_u8( // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VSHL_V_I]] uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { return vshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VSHL_V2_I]] uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { return vshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VSHL_V2_I]] uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { return vshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VSHL_V2_I]] uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { return vshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vshlq_s8( // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { return vshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VSHLQ_V2_I]] int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { return vshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VSHLQ_V2_I]] int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { return vshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VSHLQ_V2_I]] int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { return vshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vshlq_u8( // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { return vshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VSHLQ_V2_I]] uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { return vshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VSHLQ_V2_I]] uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { return vshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VSHLQ_V2_I]] uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { return vshlq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshl_s8( // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSHL_V_I]] int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { return vqshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSHL_V2_I]] int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { return vqshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSHL_V2_I]] int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { return vqshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSHL_V2_I]] int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { return vqshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshl_u8( // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSHL_V_I]] uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { return vqshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSHL_V2_I]] uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { return vqshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSHL_V2_I]] uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { return vqshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSHL_V2_I]] uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { return vqshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s8( // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { return vqshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { return vqshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { return vqshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { return vqshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u8( // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { return vqshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { return vqshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { return vqshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { return vqshlq_u64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshl_s8( // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VRSHL_V_I]] int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { return vrshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSHL_V2_I]] int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { return vrshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSHL_V2_I]] int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { return vrshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VRSHL_V2_I]] int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { return vrshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshl_u8( // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VRSHL_V_I]] uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { return vrshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSHL_V2_I]] uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { return vrshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSHL_V2_I]] uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { return vrshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VRSHL_V2_I]] uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { return vrshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s8( // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { return vrshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { return vrshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { return vrshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { return vrshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u8( // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { return vrshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { return vrshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { return vrshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { return vrshlq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s8( // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { return vqrshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRSHL_V2_I]] int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { return vqrshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRSHL_V2_I]] int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { return vqrshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQRSHL_V2_I]] int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { return vqrshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u8( // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { return vqrshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRSHL_V2_I]] uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { return vqrshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRSHL_V2_I]] uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { return vqrshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQRSHL_V2_I]] uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { return vqrshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s8( // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { return vqrshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { return vqrshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { return vqrshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { return vqrshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u8( // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { return vqrshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { return vqrshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { return vqrshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { return vqrshlq_u64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsli_n_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -4203,10 +3724,10 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0) // CHECK: ret <1 x i64> [[VSLI_N2]] poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { - return vsli_n_p64(a, b, 0); + return vsli_n_p64(a, b, 0); } -// CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -4214,1054 +3735,878 @@ poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0) // CHECK: ret <2 x i64> [[VSLI_N2]] poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { - return vsliq_n_p64(a, b, 0); + return vsliq_n_p64(a, b, 0); } -// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmax_s8( // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMAX_I]] int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { return vmax_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VMAX2_I]] int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { return vmax_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VMAX2_I]] int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { return vmax_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmax_u8( // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMAX_I]] uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { return vmax_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VMAX2_I]] uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { return vmax_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VMAX2_I]] uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { return vmax_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VMAX2_I]] float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { return vmax_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmaxq_s8( // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMAX_I]] int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { return vmaxq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmaxq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VMAX2_I]] int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { return vmaxq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmaxq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VMAX2_I]] int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { return vmaxq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmaxq_u8( // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMAX_I]] uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { return vmaxq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmaxq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VMAX2_I]] uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { return vmaxq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmaxq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VMAX2_I]] uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { return vmaxq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vmaxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VMAX2_I]] float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { return vmaxq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vmaxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VMAX2_I]] float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) { return vmaxq_f64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmin_s8( // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMIN_I]] int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { return vmin_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VMIN2_I]] int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { return vmin_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VMIN2_I]] int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { return vmin_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmin_u8( // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMIN_I]] uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { return vmin_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VMIN2_I]] uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { return vmin_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VMIN2_I]] uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { return vmin_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VMIN2_I]] float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { return vmin_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vminq_s8( // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMIN_I]] int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { return vminq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vminq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VMIN2_I]] int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { return vminq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vminq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VMIN2_I]] int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { return vminq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vminq_u8( // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMIN_I]] uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { return vminq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vminq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VMIN2_I]] uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { return vminq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vminq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VMIN2_I]] uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { return vminq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vminq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VMIN2_I]] float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { return vminq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vminq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VMIN2_I]] float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { return vminq_f64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmaxnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VMAXNM2_I]] float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { return vmaxnm_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vmaxnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VMAXNM2_I]] float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { return vmaxnmq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vmaxnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VMAXNM2_I]] float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { return vmaxnmq_f64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vminnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VMINNM2_I]] float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { return vminnm_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vminnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VMINNM2_I]] float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { return vminnmq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vminnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VMINNM2_I]] float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) { return vminnmq_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmax_s8( // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMAX_I]] int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { return vpmax_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VPMAX2_I]] int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { return vpmax_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VPMAX2_I]] int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { return vpmax_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmax_u8( // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMAX_I]] uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { return vpmax_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VPMAX2_I]] uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { return vpmax_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VPMAX2_I]] uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { return vpmax_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VPMAX2_I]] float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { return vpmax_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_s8( // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VPMAX_I]] int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { return vpmaxq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VPMAX2_I]] int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { return vpmaxq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VPMAX2_I]] int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { return vpmaxq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_u8( // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VPMAX_I]] uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { return vpmaxq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VPMAX2_I]] uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { return vpmaxq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VPMAX2_I]] uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { return vpmaxq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VPMAX2_I]] float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { return vpmaxq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vpmaxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VPMAX2_I]] float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { return vpmaxq_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmin_s8( // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMIN_I]] int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { return vpmin_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VPMIN2_I]] int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { return vpmin_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VPMIN2_I]] int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { return vpmin_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmin_u8( // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMIN_I]] uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { return vpmin_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VPMIN2_I]] uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { return vpmin_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VPMIN2_I]] uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { return vpmin_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VPMIN2_I]] float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { return vpmin_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpminq_s8( // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VPMIN_I]] int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { return vpminq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpminq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VPMIN2_I]] int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { return vpminq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpminq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VPMIN2_I]] int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) { return vpminq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpminq_u8( // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VPMIN_I]] uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { return vpminq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpminq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VPMIN2_I]] uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { return vpminq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpminq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VPMIN2_I]] uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { return vpminq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vpminq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VPMIN2_I]] float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { return vpminq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vpminq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VPMIN2_I]] float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { return vpminq_f64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpmaxnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4 +// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VPMAXNM2_I]] float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { return vpmaxnm_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vpmaxnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4 +// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VPMAXNM2_I]] float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { return vpmaxnmq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vpmaxnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4 +// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VPMAXNM2_I]] float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { return vpmaxnmq_f64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpminnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4 +// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VPMINNM2_I]] float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { return vpminnm_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vpminnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4 +// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VPMINNM2_I]] float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { return vpminnmq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vpminnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4 +// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VPMINNM2_I]] float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) { return vpminnmq_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadd_s8( // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPADD_V_I]] int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { return vpadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPADD_V2_I]] int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { return vpadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPADD_V2_I]] int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { return vpadd_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadd_u8( // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPADD_V_I]] uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { return vpadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPADD_V2_I]] uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { return vpadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPADD_V2_I]] uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { return vpadd_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpadd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VPADD_V2_I]] float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { return vpadd_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpaddq_s8( // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VPADDQ_V_I]] int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { return vpaddq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VPADDQ_V2_I]] int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { return vpaddq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VPADDQ_V2_I]] int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { return vpaddq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpaddq_u8( // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VPADDQ_V_I]] uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { return vpaddq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VPADDQ_V2_I]] uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { return vpaddq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VPADDQ_V2_I]] uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { return vpaddq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vpaddq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VPADDQ_V2_I]] float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { return vpaddq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vpaddq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double> -// CHECK: ret <2 x double> [[TMP2]] +// CHECK: ret <2 x double> [[VPADDQ_V2_I]] float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { return vpaddq_f64(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { return vqdmulh_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { return vqdmulh_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { return vqdmulhq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { return vqdmulhq_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { return vqrdmulh_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { return vqrdmulh_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { return vqrdmulhq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { return vqrdmulhq_s32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmulx_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { return vmulx_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vmulxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { return vmulxq_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vmulxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b) #4 // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { return vmulxq_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s8( // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_s8(int8x8_t a) { return vshl_n_s8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], @@ -5270,7 +4615,7 @@ int16x4_t test_vshl_n_s16(int16x4_t a) { return vshl_n_s16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], @@ -5279,14 +4624,14 @@ int32x2_t test_vshl_n_s32(int32x2_t a) { return vshl_n_s32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s8( // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_s8(int8x16_t a) { return vshlq_n_s8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], @@ -5295,7 +4640,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) { return vshlq_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], @@ -5304,7 +4649,7 @@ int32x4_t test_vshlq_n_s32(int32x4_t a) { return vshlq_n_s32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], @@ -5313,14 +4658,14 @@ int64x2_t test_vshlq_n_s64(int64x2_t a) { return vshlq_n_s64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u8( // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_u8(int8x8_t a) { return vshl_n_u8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], @@ -5329,7 +4674,7 @@ int16x4_t test_vshl_n_u16(int16x4_t a) { return vshl_n_u16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], @@ -5338,14 +4683,14 @@ int32x2_t test_vshl_n_u32(int32x2_t a) { return vshl_n_u32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u8( // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_u8(int8x16_t a) { return vshlq_n_u8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], @@ -5354,7 +4699,7 @@ int16x8_t test_vshlq_n_u16(int16x8_t a) { return vshlq_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], @@ -5363,7 +4708,7 @@ int32x4_t test_vshlq_n_u32(int32x4_t a) { return vshlq_n_u32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], @@ -5372,14 +4717,14 @@ int64x2_t test_vshlq_n_u64(int64x2_t a) { return vshlq_n_u64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s8( // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_s8(int8x8_t a) { return vshr_n_s8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], @@ -5388,7 +4733,7 @@ int16x4_t test_vshr_n_s16(int16x4_t a) { return vshr_n_s16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], @@ -5397,14 +4742,14 @@ int32x2_t test_vshr_n_s32(int32x2_t a) { return vshr_n_s32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s8( // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_s8(int8x16_t a) { return vshrq_n_s8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], @@ -5413,7 +4758,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) { return vshrq_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], @@ -5422,7 +4767,7 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) { return vshrq_n_s32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], @@ -5431,14 +4776,14 @@ int64x2_t test_vshrq_n_s64(int64x2_t a) { return vshrq_n_s64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u8( // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_u8(int8x8_t a) { return vshr_n_u8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], @@ -5447,7 +4792,7 @@ int16x4_t test_vshr_n_u16(int16x4_t a) { return vshr_n_u16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], @@ -5456,14 +4801,14 @@ int32x2_t test_vshr_n_u32(int32x2_t a) { return vshr_n_u32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u8( // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_u8(int8x16_t a) { return vshrq_n_u8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], @@ -5472,7 +4817,7 @@ int16x8_t test_vshrq_n_u16(int16x8_t a) { return vshrq_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], @@ -5481,7 +4826,7 @@ int32x4_t test_vshrq_n_u32(int32x4_t a) { return vshrq_n_u32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], @@ -5490,7 +4835,7 @@ int64x2_t test_vshrq_n_u64(int64x2_t a) { return vshrq_n_u64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s8( // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] @@ -5498,7 +4843,7 @@ int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { return vsra_n_s8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -5510,7 +4855,7 @@ int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { return vsra_n_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -5522,7 +4867,7 @@ int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { return vsra_n_s32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s8( // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] @@ -5530,7 +4875,7 @@ int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { return vsraq_n_s8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -5542,7 +4887,7 @@ int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { return vsraq_n_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -5554,7 +4899,7 @@ int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { return vsraq_n_s32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -5566,7 +4911,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { return vsraq_n_s64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u8( // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] @@ -5574,7 +4919,7 @@ int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) { return vsra_n_u8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -5586,7 +4931,7 @@ int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) { return vsra_n_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -5598,7 +4943,7 @@ int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) { return vsra_n_u32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u8( // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] @@ -5606,7 +4951,7 @@ int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) { return vsraq_n_u8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -5618,7 +4963,7 @@ int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) { return vsraq_n_u16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -5630,7 +4975,7 @@ int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) { return vsraq_n_u32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -5642,14 +4987,14 @@ int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) { return vsraq_n_u64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_s8(int8x8_t a) { return vrshr_n_s8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) @@ -5658,7 +5003,7 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) @@ -5667,14 +5012,14 @@ int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_s8(int8x16_t a) { return vrshrq_n_s8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) @@ -5683,7 +5028,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) @@ -5692,7 +5037,7 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) @@ -5701,14 +5046,14 @@ int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_u8(int8x8_t a) { return vrshr_n_u8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) @@ -5717,7 +5062,7 @@ int16x4_t test_vrshr_n_u16(int16x4_t a) { return vrshr_n_u16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) @@ -5726,14 +5071,14 @@ int32x2_t test_vrshr_n_u32(int32x2_t a) { return vrshr_n_u32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_u8(int8x16_t a) { return vrshrq_n_u8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) @@ -5742,7 +5087,7 @@ int16x8_t test_vrshrq_n_u16(int16x8_t a) { return vrshrq_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) @@ -5751,7 +5096,7 @@ int32x4_t test_vrshrq_n_u32(int32x4_t a) { return vrshrq_n_u32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) @@ -5760,7 +5105,7 @@ int64x2_t test_vrshrq_n_u64(int64x2_t a) { return vrshrq_n_u64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> ) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] @@ -5768,7 +5113,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { return vrsra_n_s8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -5780,7 +5125,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { return vrsra_n_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -5792,7 +5137,7 @@ int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { return vrsra_n_s32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> ) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] @@ -5800,7 +5145,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { return vrsraq_n_s8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -5812,7 +5157,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { return vrsraq_n_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -5824,7 +5169,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { return vrsraq_n_s32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -5836,7 +5181,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { return vrsraq_n_s64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> ) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] @@ -5844,7 +5189,7 @@ int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { return vrsra_n_u8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -5856,7 +5201,7 @@ int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { return vrsra_n_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -5868,7 +5213,7 @@ int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { return vrsra_n_u32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> ) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] @@ -5876,7 +5221,7 @@ int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { return vrsraq_n_u8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -5888,7 +5233,7 @@ int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { return vrsraq_n_u16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -5900,7 +5245,7 @@ int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { return vrsraq_n_u32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -5912,14 +5257,14 @@ int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) { return vrsraq_n_u64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s8( // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSRI_N]] int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { return vsri_n_s8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -5930,7 +5275,7 @@ int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { return vsri_n_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -5941,14 +5286,14 @@ int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { return vsri_n_s32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s8( // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSRI_N]] int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { return vsriq_n_s8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -5959,7 +5304,7 @@ int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { return vsriq_n_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -5970,7 +5315,7 @@ int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { return vsriq_n_s32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -5981,14 +5326,14 @@ int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { return vsriq_n_s64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u8( // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSRI_N]] int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { return vsri_n_u8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -5999,7 +5344,7 @@ int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { return vsri_n_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -6010,14 +5355,14 @@ int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) { return vsri_n_u32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u8( // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSRI_N]] int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { return vsriq_n_u8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -6028,7 +5373,7 @@ int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { return vsriq_n_u16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -6039,7 +5384,7 @@ int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { return vsriq_n_u32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -6050,14 +5395,14 @@ int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) { return vsriq_n_u64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsri_n_p8( // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSRI_N]] poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { return vsri_n_p8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsri_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -6068,14 +5413,14 @@ poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { return vsri_n_p16(a, b, 15); } -// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_p8( // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSRI_N]] poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { return vsriq_n_p8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -6086,14 +5431,14 @@ poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { return vsriq_n_p16(a, b, 15); } -// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { return vsli_n_s8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -6104,7 +5449,7 @@ int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { return vsli_n_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -6115,14 +5460,14 @@ int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { return vsli_n_s32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { return vsliq_n_s8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -6133,7 +5478,7 @@ int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { return vsliq_n_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -6144,7 +5489,7 @@ int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { return vsliq_n_s32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -6155,14 +5500,14 @@ int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { return vsliq_n_s64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { return vsli_n_u8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -6173,7 +5518,7 @@ uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { return vsli_n_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -6184,14 +5529,14 @@ uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { return vsli_n_u32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { return vsliq_n_u8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -6202,7 +5547,7 @@ uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { return vsliq_n_u16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -6213,7 +5558,7 @@ uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { return vsliq_n_u32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -6224,14 +5569,14 @@ uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { return vsliq_n_u64(a, b, 3); } -// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsli_n_p8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { return vsli_n_p8(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsli_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -6242,14 +5587,14 @@ poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { return vsli_n_p16(a, b, 15); } -// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_p8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { return vsliq_n_p8(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -6260,14 +5605,14 @@ poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { return vsliq_n_p16(a, b, 15); } -// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s8( // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VQSHLU_N]] int8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) @@ -6276,7 +5621,7 @@ int16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) @@ -6285,14 +5630,14 @@ int32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 3); } -// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s8( // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VQSHLU_N]] int8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) @@ -6301,7 +5646,7 @@ int16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) @@ -6310,7 +5655,7 @@ int32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) @@ -6319,7 +5664,7 @@ int64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 3); } -// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], @@ -6329,7 +5674,7 @@ int8x8_t test_vshrn_n_s16(int16x8_t a) { return vshrn_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], @@ -6339,7 +5684,7 @@ int16x4_t test_vshrn_n_s32(int32x4_t a) { return vshrn_n_s32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], @@ -6349,7 +5694,7 @@ int32x2_t test_vshrn_n_s64(int64x2_t a) { return vshrn_n_s64(a, 19); } -// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], @@ -6359,7 +5704,7 @@ uint8x8_t test_vshrn_n_u16(uint16x8_t a) { return vshrn_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], @@ -6369,7 +5714,7 @@ uint16x4_t test_vshrn_n_u32(uint32x4_t a) { return vshrn_n_u32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], @@ -6379,7 +5724,7 @@ uint32x2_t test_vshrn_n_u64(uint64x2_t a) { return vshrn_n_u64(a, 19); } -// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vshrn_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], @@ -6390,7 +5735,7 @@ int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vshrn_high_n_s16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vshrn_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], @@ -6401,7 +5746,7 @@ int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vshrn_high_n_s32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vshrn_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], @@ -6412,7 +5757,7 @@ int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vshrn_high_n_s64(a, b, 19); } -// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vshrn_high_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], @@ -6423,7 +5768,7 @@ uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vshrn_high_n_u16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vshrn_high_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], @@ -6434,7 +5779,7 @@ uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vshrn_high_n_u32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vshrn_high_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], @@ -6445,7 +5790,7 @@ uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vshrn_high_n_u64(a, b, 19); } -// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) @@ -6454,7 +5799,7 @@ int8x8_t test_vqshrun_n_s16(int16x8_t a) { return vqshrun_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) @@ -6463,7 +5808,7 @@ int16x4_t test_vqshrun_n_s32(int32x4_t a) { return vqshrun_n_s32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) @@ -6472,7 +5817,7 @@ int32x2_t test_vqshrun_n_s64(int64x2_t a) { return vqshrun_n_s64(a, 19); } -// CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshrun_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) @@ -6482,7 +5827,7 @@ int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { return vqshrun_high_n_s16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshrun_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) @@ -6492,7 +5837,7 @@ int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { return vqshrun_high_n_s32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshrun_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) @@ -6502,7 +5847,7 @@ int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { return vqshrun_high_n_s64(a, b, 19); } -// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) @@ -6511,7 +5856,7 @@ int8x8_t test_vrshrn_n_s16(int16x8_t a) { return vrshrn_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) @@ -6520,7 +5865,7 @@ int16x4_t test_vrshrn_n_s32(int32x4_t a) { return vrshrn_n_s32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) @@ -6529,7 +5874,7 @@ int32x2_t test_vrshrn_n_s64(int64x2_t a) { return vrshrn_n_s64(a, 19); } -// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) @@ -6538,7 +5883,7 @@ uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { return vrshrn_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) @@ -6547,7 +5892,7 @@ uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { return vrshrn_n_u32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) @@ -6556,7 +5901,7 @@ uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { return vrshrn_n_u64(a, 19); } -// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshrn_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) @@ -6566,7 +5911,7 @@ int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vrshrn_high_n_s16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshrn_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) @@ -6576,7 +5921,7 @@ int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vrshrn_high_n_s32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshrn_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) @@ -6586,7 +5931,7 @@ int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vrshrn_high_n_s64(a, b, 19); } -// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshrn_high_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) @@ -6596,7 +5941,7 @@ uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vrshrn_high_n_u16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshrn_high_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) @@ -6606,7 +5951,7 @@ uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vrshrn_high_n_u32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshrn_high_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) @@ -6616,7 +5961,7 @@ uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vrshrn_high_n_u64(a, b, 19); } -// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqrshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) @@ -6625,7 +5970,7 @@ int8x8_t test_vqrshrun_n_s16(int16x8_t a) { return vqrshrun_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqrshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) @@ -6634,7 +5979,7 @@ int16x4_t test_vqrshrun_n_s32(int32x4_t a) { return vqrshrun_n_s32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqrshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) @@ -6643,7 +5988,7 @@ int32x2_t test_vqrshrun_n_s64(int64x2_t a) { return vqrshrun_n_s64(a, 19); } -// CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshrun_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) @@ -6653,7 +5998,7 @@ int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { return vqrshrun_high_n_s16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshrun_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) @@ -6663,7 +6008,7 @@ int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { return vqrshrun_high_n_s32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshrun_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) @@ -6673,7 +6018,7 @@ int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { return vqrshrun_high_n_s64(a, b, 19); } -// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) @@ -6682,7 +6027,7 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { return vqshrn_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) @@ -6691,7 +6036,7 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { return vqshrn_n_s32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) @@ -6700,7 +6045,7 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { return vqshrn_n_s64(a, 19); } -// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) @@ -6709,7 +6054,7 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { return vqshrn_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) @@ -6718,7 +6063,7 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { return vqshrn_n_u32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) @@ -6727,7 +6072,7 @@ uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { return vqshrn_n_u64(a, 19); } -// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshrn_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) @@ -6737,7 +6082,7 @@ int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vqshrn_high_n_s16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshrn_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) @@ -6747,7 +6092,7 @@ int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vqshrn_high_n_s32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshrn_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) @@ -6757,7 +6102,7 @@ int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vqshrn_high_n_s64(a, b, 19); } -// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshrn_high_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) @@ -6767,7 +6112,7 @@ uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vqshrn_high_n_u16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshrn_high_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) @@ -6777,7 +6122,7 @@ uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vqshrn_high_n_u32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshrn_high_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) @@ -6787,7 +6132,7 @@ uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vqshrn_high_n_u64(a, b, 19); } -// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) @@ -6796,7 +6141,7 @@ int8x8_t test_vqrshrn_n_s16(int16x8_t a) { return vqrshrn_n_s16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) @@ -6805,7 +6150,7 @@ int16x4_t test_vqrshrn_n_s32(int32x4_t a) { return vqrshrn_n_s32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) @@ -6814,7 +6159,7 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { return vqrshrn_n_s64(a, 19); } -// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) @@ -6823,7 +6168,7 @@ uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { return vqrshrn_n_u16(a, 3); } -// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) @@ -6832,7 +6177,7 @@ uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { return vqrshrn_n_u32(a, 9); } -// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) @@ -6841,7 +6186,7 @@ uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { return vqrshrn_n_u64(a, 19); } -// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshrn_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) @@ -6851,7 +6196,7 @@ int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vqrshrn_high_n_s16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshrn_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) @@ -6861,7 +6206,7 @@ int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vqrshrn_high_n_s32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshrn_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) @@ -6871,7 +6216,7 @@ int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vqrshrn_high_n_s64(a, b, 19); } -// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshrn_high_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) @@ -6881,7 +6226,7 @@ uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vqrshrn_high_n_u16(a, b, 3); } -// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshrn_high_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) @@ -6891,7 +6236,7 @@ uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vqrshrn_high_n_u32(a, b, 9); } -// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshrn_high_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) @@ -6901,7 +6246,7 @@ uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vqrshrn_high_n_u64(a, b, 19); } -// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], // CHECK: ret <8 x i16> [[VSHLL_N]] @@ -6909,7 +6254,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> @@ -6919,7 +6264,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 9); } -// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> @@ -6929,7 +6274,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 19); } -// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], // CHECK: ret <8 x i16> [[VSHLL_N]] @@ -6937,7 +6282,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> @@ -6947,7 +6292,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 9); } -// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> @@ -6957,7 +6302,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 19); } -// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], @@ -6966,7 +6311,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) { return vshll_high_n_s8(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -6977,7 +6322,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) { return vshll_high_n_s16(a, 9); } -// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -6988,7 +6333,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) { return vshll_high_n_s32(a, 19); } -// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], @@ -6997,7 +6342,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { return vshll_high_n_u8(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -7008,7 +6353,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { return vshll_high_n_u16(a, 9); } -// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -7019,57 +6364,53 @@ uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 19); } -// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmovl_s8( // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I]] int16x8_t test_vmovl_s8(int8x8_t a) { return vmovl_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I]] int32x4_t test_vmovl_s16(int16x4_t a) { return vmovl_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I]] int64x2_t test_vmovl_s32(int32x2_t a) { return vmovl_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmovl_u8( // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I]] uint16x8_t test_vmovl_u8(uint8x8_t a) { return vmovl_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I]] uint32x4_t test_vmovl_u16(uint16x4_t a) { return vmovl_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I]] uint64x2_t test_vmovl_u32(uint32x2_t a) { return vmovl_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmovl_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] @@ -7077,27 +6418,25 @@ int16x8_t test_vmovl_high_s8(int8x16_t a) { return vmovl_high_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovl_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32> +// CHECK: ret <4 x i32> [[TMP1]] int32x4_t test_vmovl_high_s16(int16x8_t a) { return vmovl_high_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovl_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64> +// CHECK: ret <2 x i64> [[TMP1]] int64x2_t test_vmovl_high_s32(int32x4_t a) { return vmovl_high_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmovl_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] @@ -7105,27 +6444,25 @@ uint16x8_t test_vmovl_high_u8(uint8x16_t a) { return vmovl_high_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovl_high_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32> +// CHECK: ret <4 x i32> [[TMP1]] uint32x4_t test_vmovl_high_u16(uint16x8_t a) { return vmovl_high_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovl_high_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64> +// CHECK: ret <2 x i64> [[TMP1]] uint64x2_t test_vmovl_high_u32(uint32x4_t a) { return vmovl_high_u32(a); } -// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) @@ -7134,7 +6471,7 @@ float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { return vcvt_n_f32_s32(a, 31); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) @@ -7143,7 +6480,7 @@ float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { return vcvtq_n_f32_s32(a, 31); } -// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_f64_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) @@ -7152,7 +6489,7 @@ float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { return vcvtq_n_f64_s64(a, 50); } -// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) @@ -7161,7 +6498,7 @@ float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { return vcvt_n_f32_u32(a, 31); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) @@ -7170,7 +6507,7 @@ float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { return vcvtq_n_f32_u32(a, 31); } -// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_f64_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) @@ -7179,7 +6516,7 @@ float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { return vcvtq_n_f64_u64(a, 50); } -// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) @@ -7188,7 +6525,7 @@ int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { return vcvt_n_s32_f32(a, 31); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) @@ -7197,7 +6534,7 @@ int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { return vcvtq_n_s32_f32(a, 31); } -// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) @@ -7206,7 +6543,7 @@ int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { return vcvtq_n_s64_f64(a, 50); } -// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) @@ -7215,7 +6552,7 @@ uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { return vcvt_n_u32_f32(a, 31); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) @@ -7224,7 +6561,7 @@ uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { return vcvtq_n_u32_f32(a, 31); } -// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) @@ -7233,7 +6570,7 @@ uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { return vcvtq_n_u64_f64(a, 50); } -// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddl_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -7242,33 +6579,29 @@ int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { return vaddl_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { return vaddl_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { return vaddl_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddl_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -7277,33 +6610,29 @@ uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { return vaddl_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { return vaddl_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { return vaddl_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddl_high_s8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> @@ -7314,37 +6643,33 @@ int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { return vaddl_high_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddl_high_s16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> +// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { return vaddl_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddl_high_s32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> +// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { return vaddl_high_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddl_high_u8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> @@ -7355,37 +6680,33 @@ uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { return vaddl_high_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddl_high_u16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> +// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { return vaddl_high_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddl_high_u32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> +// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { return vaddl_high_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddw_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -7393,27 +6714,25 @@ int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { return vaddw_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddw_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { return vaddw_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddw_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { return vaddw_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddw_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -7421,27 +6740,25 @@ uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { return vaddw_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddw_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { return vaddw_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddw_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { return vaddw_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddw_high_s8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] @@ -7450,29 +6767,27 @@ int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { return vaddw_high_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddw_high_s16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> +// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { return vaddw_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddw_high_s32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> +// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { return vaddw_high_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddw_high_u8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] @@ -7481,29 +6796,27 @@ uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { return vaddw_high_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddw_high_u16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> +// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { return vaddw_high_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddw_high_u32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> +// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { return vaddw_high_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubl_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -7512,33 +6825,29 @@ int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { return vsubl_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { return vsubl_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { return vsubl_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubl_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -7547,33 +6856,29 @@ uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { return vsubl_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { return vsubl_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { return vsubl_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubl_high_s8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> @@ -7584,37 +6889,33 @@ int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { return vsubl_high_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubl_high_s16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> +// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { return vsubl_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubl_high_s32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> +// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { return vsubl_high_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubl_high_u8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> @@ -7625,37 +6926,33 @@ uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { return vsubl_high_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubl_high_u16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> +// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { return vsubl_high_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubl_high_u32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> +// CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> +// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { return vsubl_high_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubw_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -7663,27 +6960,25 @@ int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { return vsubw_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubw_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { return vsubw_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubw_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { return vsubw_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubw_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -7691,27 +6986,25 @@ uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { return vsubw_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubw_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { return vsubw_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubw_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { return vsubw_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubw_high_s8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] @@ -7720,29 +7013,27 @@ int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { return vsubw_high_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubw_high_s16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> +// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { return vsubw_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubw_high_s32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> +// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { return vsubw_high_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubw_high_u8( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] @@ -7751,34 +7042,30 @@ uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { return vsubw_high_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubw_high_u16( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> +// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { return vsubw_high_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubw_high_u32( // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]] +// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> +// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { return vsubw_high_u32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] @@ -7786,12 +7073,10 @@ int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { return vaddhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] @@ -7799,12 +7084,10 @@ int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { return vaddhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] @@ -7812,12 +7095,10 @@ int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { return vaddhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] @@ -7825,12 +7106,10 @@ uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { return vaddhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] @@ -7838,12 +7117,10 @@ uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { return vaddhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] @@ -7851,12 +7128,10 @@ uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { return vaddhn_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddhn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> @@ -7865,12 +7140,10 @@ int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { return vaddhn_high_s16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddhn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> @@ -7879,12 +7152,10 @@ int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { return vaddhn_high_s32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddhn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> @@ -7893,12 +7164,10 @@ int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { return vaddhn_high_s64(r, a, b); } -// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddhn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> @@ -7907,12 +7176,10 @@ uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { return vaddhn_high_u16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddhn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> @@ -7921,12 +7188,10 @@ uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { return vaddhn_high_u32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddhn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> @@ -7935,166 +7200,132 @@ uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { return vaddhn_high_u64(r, a, b); } -// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vraddhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { return vraddhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vraddhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRADDHN_V2_I]] int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { return vraddhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vraddhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRADDHN_V2_I]] int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { return vraddhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vraddhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { return vraddhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vraddhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRADDHN_V2_I]] uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { return vraddhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vraddhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRADDHN_V2_I]] uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { return vraddhn_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vraddhn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { return vraddhn_high_s16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vraddhn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { return vraddhn_high_s32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vraddhn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { return vraddhn_high_s64(r, a, b); } -// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vraddhn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { return vraddhn_high_u16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vraddhn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { return vraddhn_high_u32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vraddhn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { return vraddhn_high_u64(r, a, b); } -// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] @@ -8102,12 +7333,10 @@ int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { return vsubhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] @@ -8115,12 +7344,10 @@ int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { return vsubhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] @@ -8128,12 +7355,10 @@ int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { return vsubhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] @@ -8141,12 +7366,10 @@ uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { return vsubhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] @@ -8154,12 +7377,10 @@ uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { return vsubhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] @@ -8167,12 +7388,10 @@ uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { return vsubhn_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubhn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> @@ -8181,12 +7400,10 @@ int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { return vsubhn_high_s16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubhn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> @@ -8195,12 +7412,10 @@ int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { return vsubhn_high_s32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubhn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> @@ -8209,12 +7424,10 @@ int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { return vsubhn_high_s64(r, a, b); } -// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubhn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> @@ -8223,12 +7436,10 @@ uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { return vsubhn_high_u16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubhn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> @@ -8237,12 +7448,10 @@ uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { return vsubhn_high_u32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubhn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> @@ -8251,228 +7460,189 @@ uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { return vsubhn_high_u64(r, a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { return vrsubhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { return vrsubhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { return vrsubhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { return vrsubhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { return vrsubhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { return vrsubhn_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { return vrsubhn_high_s16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { return vrsubhn_high_s32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { return vrsubhn_high_s64(r, a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { return vrsubhn_high_u16(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { return vrsubhn_high_u32(r, a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { return vrsubhn_high_u64(r, a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vabdl_s8( // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { return vabdl_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vabdl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I]] int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { return vabdl_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vabdl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I]] int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { return vabdl_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vabdl_u8( // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { return vabdl_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vabdl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I]] uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { return vabdl_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vabdl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I]] uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { return vabdl_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vabal_s8( // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] @@ -8480,35 +7650,32 @@ uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return vabal_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { + +// CHECK-LABEL: @test_vabal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vabal_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { + +// CHECK-LABEL: @test_vabal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vabal_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { + +// CHECK-LABEL: @test_vabal_u8( // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] @@ -8516,36 +7683,32 @@ int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return vabal_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { + +// CHECK-LABEL: @test_vabal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vabal_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { + +// CHECK-LABEL: @test_vabal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vabal_u32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vabdl_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8554,37 +7717,34 @@ uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { return vabdl_high_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vabdl_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) { return vabdl_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vabdl_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { return vabdl_high_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vabdl_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8593,38 +7753,34 @@ int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { return vabdl_high_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vabdl_high_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) { return vabdl_high_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vabdl_high_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { return vabdl_high_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vabal_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8634,39 +7790,36 @@ uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { return vabal_high_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { + +// CHECK-LABEL: @test_vabal_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32> // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vabal_high_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { + +// CHECK-LABEL: @test_vabal_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64> // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vabal_high_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { + +// CHECK-LABEL: @test_vabal_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8676,93 +7829,86 @@ int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { return vabal_high_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { + +// CHECK-LABEL: @test_vabal_high_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32> // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { return vabal_high_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { + +// CHECK-LABEL: @test_vabal_high_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64> // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { return vabal_high_u32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_s8( // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i16> [[VMULL_I]] int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { return vmull_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { return vmull_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { return vmull_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vmull_u8( // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i16> [[VMULL_I]] uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { return vmull_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vmull_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { return vmull_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vmull_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { return vmull_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8770,31 +7916,30 @@ uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { return vmull_high_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vmull_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: ret <4 x i32> [[VMULL2_I_I]] int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { return vmull_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vmull_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: ret <2 x i64> [[VMULL2_I_I]] int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { return vmull_high_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vmull_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8802,91 +7947,86 @@ int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { return vmull_high_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vmull_high_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: ret <4 x i32> [[VMULL2_I_I]] uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { return vmull_high_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vmull_high_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: ret <2 x i64> [[VMULL2_I_I]] uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) { return vmull_high_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlal_s8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return vmlal_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vmlal_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vmlal_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { + +// CHECK-LABEL: @test_vmlal_u8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return vmlal_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vmlal_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vmlal_u32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlal_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8895,33 +8035,32 @@ uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { return vmlal_high_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlal_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vmlal_high_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlal_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vmlal_high_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { + +// CHECK-LABEL: @test_vmlal_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -8930,93 +8069,88 @@ int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { return vmlal_high_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlal_high_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { return vmlal_high_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlal_high_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { return vmlal_high_u32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlsl_s8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return vmlsl_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vmlsl_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vmlsl_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_u8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return vmlsl_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vmlsl_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vmlsl_u32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlsl_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -9025,33 +8159,32 @@ uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { return vmlsl_high_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vmlsl_high_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vmlsl_high_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -9060,215 +8193,179 @@ int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { return vmlsl_high_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_high_u16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { return vmlsl_high_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { + +// CHECK-LABEL: @test_vmlsl_high_u32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { return vmlsl_high_u32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { return vqdmull_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vqdmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { return vqdmull_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmull_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4 +// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]] int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { return vqdmull_high_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vqdmull_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4 +// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]] int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { return vqdmull_high_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4 -// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]] int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vqdmlal_high_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4 -// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]] int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vqdmlal_high_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_s16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4 -// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]] int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vqdmlsl_high_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_high_s32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4 -// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]] int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vqdmlsl_high_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_p8( // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i16> [[VMULL_I]] poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { return vmull_p8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_high_p8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 @@ -9277,35 +8374,35 @@ poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { return vmull_high_p8(a, b); } -// CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vaddd_s64( // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b // CHECK: ret i64 [[VADDD_I]] int64_t test_vaddd_s64(int64_t a, int64_t b) { return vaddd_s64(a, b); } -// CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vaddd_u64( // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b // CHECK: ret i64 [[VADDD_I]] uint64_t test_vaddd_u64(uint64_t a, uint64_t b) { return vaddd_u64(a, b); } -// CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsubd_s64( // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b // CHECK: ret i64 [[VSUBD_I]] int64_t test_vsubd_s64(int64_t a, int64_t b) { return vsubd_s64(a, b); } -// CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsubd_u64( // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b // CHECK: ret i64 [[VSUBD_I]] uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { return vsubd_u64(a, b); } -// CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqaddb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9315,7 +8412,7 @@ int8_t test_vqaddb_s8(int8_t a, int8_t b) { return vqaddb_s8(a, b); } -// CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqaddh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9325,21 +8422,21 @@ int16_t test_vqaddh_s16(int16_t a, int16_t b) { return vqaddh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqadds_s32( // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQADDS_S32_I]] int32_t test_vqadds_s32(int32_t a, int32_t b) { return vqadds_s32(a, b); } -// CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqaddd_s64( // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQADDD_S64_I]] int64_t test_vqaddd_s64(int64_t a, int64_t b) { return vqaddd_s64(a, b); } -// CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqaddb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9349,7 +8446,7 @@ uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { return vqaddb_u8(a, b); } -// CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqaddh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9359,21 +8456,21 @@ uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { return vqaddh_u16(a, b); } -// CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqadds_u32( // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQADDS_U32_I]] uint32_t test_vqadds_u32(uint32_t a, uint32_t b) { return vqadds_u32(a, b); } -// CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqaddd_u64( // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQADDD_U64_I]] uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { return vqaddd_u64(a, b); } -// CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqsubb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9383,7 +8480,7 @@ int8_t test_vqsubb_s8(int8_t a, int8_t b) { return vqsubb_s8(a, b); } -// CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqsubh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9393,21 +8490,21 @@ int16_t test_vqsubh_s16(int16_t a, int16_t b) { return vqsubh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqsubs_s32( // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQSUBS_S32_I]] int32_t test_vqsubs_s32(int32_t a, int32_t b) { return vqsubs_s32(a, b); } -// CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqsubd_s64( // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQSUBD_S64_I]] int64_t test_vqsubd_s64(int64_t a, int64_t b) { return vqsubd_s64(a, b); } -// CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqsubb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9417,7 +8514,7 @@ uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { return vqsubb_u8(a, b); } -// CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqsubh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9427,35 +8524,35 @@ uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { return vqsubh_u16(a, b); } -// CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqsubs_u32( // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQSUBS_U32_I]] uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) { return vqsubs_u32(a, b); } -// CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqsubd_u64( // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQSUBD_U64_I]] uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) { return vqsubd_u64(a, b); } -// CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vshld_s64( // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VSHLD_S64_I]] int64_t test_vshld_s64(int64_t a, int64_t b) { return vshld_s64(a, b); } -// CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vshld_u64( // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VSHLD_U64_I]] uint64_t test_vshld_u64(uint64_t a, uint64_t b) { return vshld_u64(a, b); } -// CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqshlb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9465,7 +8562,7 @@ int8_t test_vqshlb_s8(int8_t a, int8_t b) { return vqshlb_s8(a, b); } -// CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqshlh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9475,21 +8572,21 @@ int16_t test_vqshlh_s16(int16_t a, int16_t b) { return vqshlh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqshls_s32( // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQSHLS_S32_I]] int32_t test_vqshls_s32(int32_t a, int32_t b) { return vqshls_s32(a, b); } -// CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqshld_s64( // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQSHLD_S64_I]] int64_t test_vqshld_s64(int64_t a, int64_t b) { return vqshld_s64(a, b); } -// CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqshlb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9499,7 +8596,7 @@ uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { return vqshlb_u8(a, b); } -// CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqshlh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9509,36 +8606,35 @@ uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { return vqshlh_u16(a, b); } -// CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqshls_u32( // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQSHLS_U32_I]] uint32_t test_vqshls_u32(uint32_t a, uint32_t b) { return vqshls_u32(a, b); } -// CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqshld_u64( // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQSHLD_U64_I]] uint64_t test_vqshld_u64(uint64_t a, uint64_t b) { return vqshld_u64(a, b); } -// CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vrshld_s64( // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VRSHLD_S64_I]] int64_t test_vrshld_s64(int64_t a, int64_t b) { return vrshld_s64(a, b); } - -// CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vrshld_u64( // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VRSHLD_U64_I]] uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { return vrshld_u64(a, b); } -// CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqrshlb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9548,7 +8644,7 @@ int8_t test_vqrshlb_s8(int8_t a, int8_t b) { return vqrshlb_s8(a, b); } -// CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqrshlh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9558,21 +8654,21 @@ int16_t test_vqrshlh_s16(int16_t a, int16_t b) { return vqrshlh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrshls_s32( // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQRSHLS_S32_I]] int32_t test_vqrshls_s32(int32_t a, int32_t b) { return vqrshls_s32(a, b); } -// CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqrshld_s64( // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQRSHLD_S64_I]] int64_t test_vqrshld_s64(int64_t a, int64_t b) { return vqrshld_s64(a, b); } -// CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vqrshlb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -9582,7 +8678,7 @@ uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { return vqrshlb_u8(a, b); } -// CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqrshlh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9592,124 +8688,113 @@ uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { return vqrshlh_u16(a, b); } -// CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrshls_u32( // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQRSHLS_U32_I]] uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) { return vqrshls_u32(a, b); } -// CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vqrshld_u64( // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VQRSHLD_U64_I]] uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { return vqrshld_u64(a, b); } -// CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vpaddd_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 +// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) #4 // CHECK: ret i64 [[VPADDD_S64_I]] int64_t test_vpaddd_s64(int64x2_t a) { return vpaddd_s64(a); } -// CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vpadds_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0 -// CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1 +// CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0 +// CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]] // CHECK: ret float [[VPADDD_I]] float32_t test_vpadds_f32(float32x2_t a) { return vpadds_f32(a); } -// CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vpaddd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 -// CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1 +// CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0 +// CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]] // CHECK: ret double [[VPADDD_I]] float64_t test_vpaddd_f64(float64x2_t a) { return vpaddd_f64(a); } -// CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vpmaxnms_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VPMAXNMS_F32_I]] float32_t test_vpmaxnms_f32(float32x2_t a) { return vpmaxnms_f32(a); } -// CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vpmaxnmqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VPMAXNMQD_F64_I]] float64_t test_vpmaxnmqd_f64(float64x2_t a) { return vpmaxnmqd_f64(a); } -// CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vpmaxs_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VPMAXS_F32_I]] float32_t test_vpmaxs_f32(float32x2_t a) { return vpmaxs_f32(a); } -// CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vpmaxqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VPMAXQD_F64_I]] float64_t test_vpmaxqd_f64(float64x2_t a) { return vpmaxqd_f64(a); } -// CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vpminnms_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VPMINNMS_F32_I]] float32_t test_vpminnms_f32(float32x2_t a) { return vpminnms_f32(a); } -// CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vpminnmqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VPMINNMQD_F64_I]] float64_t test_vpminnmqd_f64(float64x2_t a) { return vpminnmqd_f64(a); } -// CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vpmins_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VPMINS_F32_I]] float32_t test_vpmins_f32(float32x2_t a) { return vpmins_f32(a); } -// CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vpminqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VPMINQD_F64_I]] float64_t test_vpminqd_f64(float64x2_t a) { return vpminqd_f64(a); } -// CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqdmulhh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9719,14 +8804,14 @@ int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { return vqdmulhh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmulhs_s32( // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQDMULHS_S32_I]] int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { return vqdmulhs_s32(a, b); } -// CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqrdmulhh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -9736,155 +8821,151 @@ int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { return vqrdmulhh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrdmulhs_s32( // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VQRDMULHS_S32_I]] int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) { return vqrdmulhs_s32(a, b); } -// CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vmulxs_f32( // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4 // CHECK: ret float [[VMULXS_F32_I]] float32_t test_vmulxs_f32(float32_t a, float32_t b) { return vmulxs_f32(a, b); } -// CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vmulxd_f64( // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4 // CHECK: ret double [[VMULXD_F64_I]] float64_t test_vmulxd_f64(float64_t a, float64_t b) { return vmulxd_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vmulx_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4 +// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x double> [[VMULX2_I]] float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) { return vmulx_f64(a, b); } -// CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vrecpss_f32( // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4 // CHECK: ret float [[VRECPS_I]] float32_t test_vrecpss_f32(float32_t a, float32_t b) { return vrecpss_f32(a, b); } -// CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vrecpsd_f64( // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4 // CHECK: ret double [[VRECPS_I]] float64_t test_vrecpsd_f64(float64_t a, float64_t b) { return vrecpsd_f64(a, b); } -// CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vrsqrtss_f32( // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4 // CHECK: ret float [[VRSQRTSS_F32_I]] float32_t test_vrsqrtss_f32(float32_t a, float32_t b) { return vrsqrtss_f32(a, b); } -// CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vrsqrtsd_f64( // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4 // CHECK: ret double [[VRSQRTSD_F64_I]] float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) { return vrsqrtsd_f64(a, b); } -// CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vcvts_f32_s32( // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float // CHECK: ret float [[TMP0]] float32_t test_vcvts_f32_s32(int32_t a) { return vcvts_f32_s32(a); } -// CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vcvtd_f64_s64( // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double // CHECK: ret double [[TMP0]] float64_t test_vcvtd_f64_s64(int64_t a) { return vcvtd_f64_s64(a); } -// CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vcvts_f32_u32( // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float // CHECK: ret float [[TMP0]] float32_t test_vcvts_f32_u32(uint32_t a) { return vcvts_f32_u32(a); } -// CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vcvtd_f64_u64( // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double // CHECK: ret double [[TMP0]] float64_t test_vcvtd_f64_u64(uint64_t a) { return vcvtd_f64_u64(a); } -// CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 { +// CHECK-LABEL: @test_vrecpes_f32( // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4 // CHECK: ret float [[VRECPES_F32_I]] float32_t test_vrecpes_f32(float32_t a) { return vrecpes_f32(a); } - -// CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 { + +// CHECK-LABEL: @test_vrecped_f64( // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4 // CHECK: ret double [[VRECPED_F64_I]] float64_t test_vrecped_f64(float64_t a) { return vrecped_f64(a); } - -// CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 { + +// CHECK-LABEL: @test_vrecpxs_f32( // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4 // CHECK: ret float [[VRECPXS_F32_I]] float32_t test_vrecpxs_f32(float32_t a) { return vrecpxs_f32(a); - } - -// CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 { +} + +// CHECK-LABEL: @test_vrecpxd_f64( // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4 // CHECK: ret double [[VRECPXD_F64_I]] float64_t test_vrecpxd_f64(float64_t a) { return vrecpxd_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrsqrte_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a) #4 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] uint32x2_t test_vrsqrte_u32(uint32x2_t a) { return vrsqrte_u32(a); } -// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrsqrteq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a) #4 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { return vrsqrteq_u32(a); } -// CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 { +// CHECK-LABEL: @test_vrsqrtes_f32( // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4 // CHECK: ret float [[VRSQRTES_F32_I]] float32_t test_vrsqrtes_f32(float32_t a) { return vrsqrtes_f32(a); } -// CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 { +// CHECK-LABEL: @test_vrsqrted_f64( // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4 // CHECK: ret double [[VRSQRTED_F64_I]] float64_t test_vrsqrted_f64(float64_t a) { return vrsqrted_f64(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] // CHECK: ret <16 x i8> [[TMP1]] @@ -9892,7 +8973,7 @@ uint8x16_t test_vld1q_u8(uint8_t const *a) { return vld1q_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] @@ -9901,7 +8982,7 @@ uint16x8_t test_vld1q_u16(uint16_t const *a) { return vld1q_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] @@ -9910,7 +8991,7 @@ uint32x4_t test_vld1q_u32(uint32_t const *a) { return vld1q_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] @@ -9919,7 +9000,7 @@ uint64x2_t test_vld1q_u64(uint64_t const *a) { return vld1q_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] // CHECK: ret <16 x i8> [[TMP1]] @@ -9927,7 +9008,7 @@ int8x16_t test_vld1q_s8(int8_t const *a) { return vld1q_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] @@ -9936,7 +9017,7 @@ int16x8_t test_vld1q_s16(int16_t const *a) { return vld1q_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] @@ -9945,7 +9026,7 @@ int32x4_t test_vld1q_s32(int32_t const *a) { return vld1q_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] @@ -9954,7 +9035,7 @@ int64x2_t test_vld1q_s64(int64_t const *a) { return vld1q_s64(a); } -// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld1q_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] @@ -9964,7 +9045,7 @@ float16x8_t test_vld1q_f16(float16_t const *a) { return vld1q_f16(a); } -// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld1q_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* // CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]] @@ -9973,7 +9054,7 @@ float32x4_t test_vld1q_f32(float32_t const *a) { return vld1q_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld1q_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* // CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]] @@ -9982,7 +9063,7 @@ float64x2_t test_vld1q_f64(float64_t const *a) { return vld1q_f64(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] // CHECK: ret <16 x i8> [[TMP1]] @@ -9990,7 +9071,7 @@ poly8x16_t test_vld1q_p8(poly8_t const *a) { return vld1q_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] @@ -9999,7 +9080,7 @@ poly16x8_t test_vld1q_p16(poly16_t const *a) { return vld1q_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] // CHECK: ret <8 x i8> [[TMP1]] @@ -10007,7 +9088,7 @@ uint8x8_t test_vld1_u8(uint8_t const *a) { return vld1_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] @@ -10016,7 +9097,7 @@ uint16x4_t test_vld1_u16(uint16_t const *a) { return vld1_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] @@ -10025,7 +9106,7 @@ uint32x2_t test_vld1_u32(uint32_t const *a) { return vld1_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] @@ -10034,7 +9115,7 @@ uint64x1_t test_vld1_u64(uint64_t const *a) { return vld1_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] // CHECK: ret <8 x i8> [[TMP1]] @@ -10042,7 +9123,7 @@ int8x8_t test_vld1_s8(int8_t const *a) { return vld1_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] @@ -10051,7 +9132,7 @@ int16x4_t test_vld1_s16(int16_t const *a) { return vld1_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] @@ -10060,7 +9141,7 @@ int32x2_t test_vld1_s32(int32_t const *a) { return vld1_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] @@ -10069,7 +9150,7 @@ int64x1_t test_vld1_s64(int64_t const *a) { return vld1_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld1_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] @@ -10079,7 +9160,7 @@ float16x4_t test_vld1_f16(float16_t const *a) { return vld1_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld1_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* // CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]] @@ -10088,7 +9169,7 @@ float32x2_t test_vld1_f32(float32_t const *a) { return vld1_f32(a); } -// CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld1_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* // CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]] @@ -10097,7 +9178,7 @@ float64x1_t test_vld1_f64(float64_t const *a) { return vld1_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] // CHECK: ret <8 x i8> [[TMP1]] @@ -10105,7 +9186,7 @@ poly8x8_t test_vld1_p8(poly8_t const *a) { return vld1_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] @@ -10114,7 +9195,7 @@ poly16x4_t test_vld1_p16(poly16_t const *a) { return vld1_p16(a); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld2q_u8( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* @@ -10131,7 +9212,7 @@ uint8x16x2_t test_vld2q_u8(uint8_t const *a) { return vld2q_u8(a); } -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld2q_u16( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* @@ -10149,7 +9230,7 @@ uint16x8x2_t test_vld2q_u16(uint16_t const *a) { return vld2q_u16(a); } -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld2q_u32( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* @@ -10167,7 +9248,7 @@ uint32x4x2_t test_vld2q_u32(uint32_t const *a) { return vld2q_u32(a); } -// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld2q_u64( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* @@ -10185,7 +9266,7 @@ uint64x2x2_t test_vld2q_u64(uint64_t const *a) { return vld2q_u64(a); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld2q_s8( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* @@ -10202,7 +9283,7 @@ int8x16x2_t test_vld2q_s8(int8_t const *a) { return vld2q_s8(a); } -// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld2q_s16( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* @@ -10220,7 +9301,7 @@ int16x8x2_t test_vld2q_s16(int16_t const *a) { return vld2q_s16(a); } -// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld2q_s32( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* @@ -10238,7 +9319,7 @@ int32x4x2_t test_vld2q_s32(int32_t const *a) { return vld2q_s32(a); } -// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld2q_s64( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* @@ -10256,7 +9337,7 @@ int64x2x2_t test_vld2q_s64(int64_t const *a) { return vld2q_s64(a); } -// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld2q_f16( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* @@ -10274,7 +9355,7 @@ float16x8x2_t test_vld2q_f16(float16_t const *a) { return vld2q_f16(a); } -// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld2q_f32( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* @@ -10292,7 +9373,7 @@ float32x4x2_t test_vld2q_f32(float32_t const *a) { return vld2q_f32(a); } -// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld2q_f64( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* @@ -10310,7 +9391,7 @@ float64x2x2_t test_vld2q_f64(float64_t const *a) { return vld2q_f64(a); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld2q_p8( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* @@ -10327,7 +9408,7 @@ poly8x16x2_t test_vld2q_p8(poly8_t const *a) { return vld2q_p8(a); } -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld2q_p16( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* @@ -10345,7 +9426,7 @@ poly16x8x2_t test_vld2q_p16(poly16_t const *a) { return vld2q_p16(a); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld2_u8( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* @@ -10362,7 +9443,7 @@ uint8x8x2_t test_vld2_u8(uint8_t const *a) { return vld2_u8(a); } -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld2_u16( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* @@ -10380,7 +9461,7 @@ uint16x4x2_t test_vld2_u16(uint16_t const *a) { return vld2_u16(a); } -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld2_u32( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* @@ -10398,7 +9479,7 @@ uint32x2x2_t test_vld2_u32(uint32_t const *a) { return vld2_u32(a); } -// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld2_u64( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* @@ -10416,7 +9497,7 @@ uint64x1x2_t test_vld2_u64(uint64_t const *a) { return vld2_u64(a); } -// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld2_s8( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* @@ -10433,7 +9514,7 @@ int8x8x2_t test_vld2_s8(int8_t const *a) { return vld2_s8(a); } -// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld2_s16( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* @@ -10451,7 +9532,7 @@ int16x4x2_t test_vld2_s16(int16_t const *a) { return vld2_s16(a); } -// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld2_s32( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* @@ -10469,7 +9550,7 @@ int32x2x2_t test_vld2_s32(int32_t const *a) { return vld2_s32(a); } -// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld2_s64( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* @@ -10487,7 +9568,7 @@ int64x1x2_t test_vld2_s64(int64_t const *a) { return vld2_s64(a); } -// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld2_f16( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* @@ -10505,7 +9586,7 @@ float16x4x2_t test_vld2_f16(float16_t const *a) { return vld2_f16(a); } -// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld2_f32( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* @@ -10523,7 +9604,7 @@ float32x2x2_t test_vld2_f32(float32_t const *a) { return vld2_f32(a); } -// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld2_f64( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* @@ -10541,7 +9622,7 @@ float64x1x2_t test_vld2_f64(float64_t const *a) { return vld2_f64(a); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld2_p8( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* @@ -10558,7 +9639,7 @@ poly8x8x2_t test_vld2_p8(poly8_t const *a) { return vld2_p8(a); } -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld2_p16( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* @@ -10576,7 +9657,7 @@ poly16x4x2_t test_vld2_p16(poly16_t const *a) { return vld2_p16(a); } -// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld3q_u8( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* @@ -10593,7 +9674,7 @@ uint8x16x3_t test_vld3q_u8(uint8_t const *a) { return vld3q_u8(a); } -// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld3q_u16( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* @@ -10611,7 +9692,7 @@ uint16x8x3_t test_vld3q_u16(uint16_t const *a) { return vld3q_u16(a); } -// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld3q_u32( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* @@ -10629,7 +9710,7 @@ uint32x4x3_t test_vld3q_u32(uint32_t const *a) { return vld3q_u32(a); } -// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld3q_u64( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* @@ -10647,7 +9728,7 @@ uint64x2x3_t test_vld3q_u64(uint64_t const *a) { return vld3q_u64(a); } -// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld3q_s8( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* @@ -10664,7 +9745,7 @@ int8x16x3_t test_vld3q_s8(int8_t const *a) { return vld3q_s8(a); } -// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld3q_s16( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* @@ -10682,7 +9763,7 @@ int16x8x3_t test_vld3q_s16(int16_t const *a) { return vld3q_s16(a); } -// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld3q_s32( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* @@ -10700,7 +9781,7 @@ int32x4x3_t test_vld3q_s32(int32_t const *a) { return vld3q_s32(a); } -// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld3q_s64( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* @@ -10718,7 +9799,7 @@ int64x2x3_t test_vld3q_s64(int64_t const *a) { return vld3q_s64(a); } -// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld3q_f16( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* @@ -10736,7 +9817,7 @@ float16x8x3_t test_vld3q_f16(float16_t const *a) { return vld3q_f16(a); } -// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld3q_f32( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* @@ -10754,7 +9835,7 @@ float32x4x3_t test_vld3q_f32(float32_t const *a) { return vld3q_f32(a); } -// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld3q_f64( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* @@ -10772,7 +9853,7 @@ float64x2x3_t test_vld3q_f64(float64_t const *a) { return vld3q_f64(a); } -// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld3q_p8( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* @@ -10789,7 +9870,7 @@ poly8x16x3_t test_vld3q_p8(poly8_t const *a) { return vld3q_p8(a); } -// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld3q_p16( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* @@ -10807,7 +9888,7 @@ poly16x8x3_t test_vld3q_p16(poly16_t const *a) { return vld3q_p16(a); } -// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld3_u8( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* @@ -10824,7 +9905,7 @@ uint8x8x3_t test_vld3_u8(uint8_t const *a) { return vld3_u8(a); } -// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld3_u16( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* @@ -10842,7 +9923,7 @@ uint16x4x3_t test_vld3_u16(uint16_t const *a) { return vld3_u16(a); } -// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld3_u32( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* @@ -10860,7 +9941,7 @@ uint32x2x3_t test_vld3_u32(uint32_t const *a) { return vld3_u32(a); } -// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld3_u64( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* @@ -10878,7 +9959,7 @@ uint64x1x3_t test_vld3_u64(uint64_t const *a) { return vld3_u64(a); } -// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld3_s8( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* @@ -10895,7 +9976,7 @@ int8x8x3_t test_vld3_s8(int8_t const *a) { return vld3_s8(a); } -// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld3_s16( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* @@ -10913,7 +9994,7 @@ int16x4x3_t test_vld3_s16(int16_t const *a) { return vld3_s16(a); } -// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld3_s32( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* @@ -10931,7 +10012,7 @@ int32x2x3_t test_vld3_s32(int32_t const *a) { return vld3_s32(a); } -// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld3_s64( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* @@ -10949,7 +10030,7 @@ int64x1x3_t test_vld3_s64(int64_t const *a) { return vld3_s64(a); } -// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld3_f16( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* @@ -10967,7 +10048,7 @@ float16x4x3_t test_vld3_f16(float16_t const *a) { return vld3_f16(a); } -// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld3_f32( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* @@ -10985,7 +10066,7 @@ float32x2x3_t test_vld3_f32(float32_t const *a) { return vld3_f32(a); } -// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld3_f64( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* @@ -11003,7 +10084,7 @@ float64x1x3_t test_vld3_f64(float64_t const *a) { return vld3_f64(a); } -// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld3_p8( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* @@ -11020,7 +10101,7 @@ poly8x8x3_t test_vld3_p8(poly8_t const *a) { return vld3_p8(a); } -// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld3_p16( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* @@ -11038,7 +10119,7 @@ poly16x4x3_t test_vld3_p16(poly16_t const *a) { return vld3_p16(a); } -// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld4q_u8( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* @@ -11055,7 +10136,7 @@ uint8x16x4_t test_vld4q_u8(uint8_t const *a) { return vld4q_u8(a); } -// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld4q_u16( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* @@ -11073,7 +10154,7 @@ uint16x8x4_t test_vld4q_u16(uint16_t const *a) { return vld4q_u16(a); } -// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld4q_u32( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* @@ -11091,7 +10172,7 @@ uint32x4x4_t test_vld4q_u32(uint32_t const *a) { return vld4q_u32(a); } -// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld4q_u64( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* @@ -11109,7 +10190,7 @@ uint64x2x4_t test_vld4q_u64(uint64_t const *a) { return vld4q_u64(a); } -// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld4q_s8( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* @@ -11126,7 +10207,7 @@ int8x16x4_t test_vld4q_s8(int8_t const *a) { return vld4q_s8(a); } -// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld4q_s16( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* @@ -11144,7 +10225,7 @@ int16x8x4_t test_vld4q_s16(int16_t const *a) { return vld4q_s16(a); } -// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld4q_s32( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* @@ -11162,7 +10243,7 @@ int32x4x4_t test_vld4q_s32(int32_t const *a) { return vld4q_s32(a); } -// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld4q_s64( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* @@ -11180,7 +10261,7 @@ int64x2x4_t test_vld4q_s64(int64_t const *a) { return vld4q_s64(a); } -// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld4q_f16( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* @@ -11198,7 +10279,7 @@ float16x8x4_t test_vld4q_f16(float16_t const *a) { return vld4q_f16(a); } -// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld4q_f32( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* @@ -11216,7 +10297,7 @@ float32x4x4_t test_vld4q_f32(float32_t const *a) { return vld4q_f32(a); } -// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld4q_f64( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* @@ -11234,7 +10315,7 @@ float64x2x4_t test_vld4q_f64(float64_t const *a) { return vld4q_f64(a); } -// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld4q_p8( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* @@ -11251,7 +10332,7 @@ poly8x16x4_t test_vld4q_p8(poly8_t const *a) { return vld4q_p8(a); } -// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld4q_p16( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* @@ -11269,7 +10350,7 @@ poly16x8x4_t test_vld4q_p16(poly16_t const *a) { return vld4q_p16(a); } -// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld4_u8( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* @@ -11286,7 +10367,7 @@ uint8x8x4_t test_vld4_u8(uint8_t const *a) { return vld4_u8(a); } -// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld4_u16( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* @@ -11304,7 +10385,7 @@ uint16x4x4_t test_vld4_u16(uint16_t const *a) { return vld4_u16(a); } -// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld4_u32( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* @@ -11322,7 +10403,7 @@ uint32x2x4_t test_vld4_u32(uint32_t const *a) { return vld4_u32(a); } -// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld4_u64( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* @@ -11340,7 +10421,7 @@ uint64x1x4_t test_vld4_u64(uint64_t const *a) { return vld4_u64(a); } -// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld4_s8( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* @@ -11357,7 +10438,7 @@ int8x8x4_t test_vld4_s8(int8_t const *a) { return vld4_s8(a); } -// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld4_s16( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* @@ -11375,7 +10456,7 @@ int16x4x4_t test_vld4_s16(int16_t const *a) { return vld4_s16(a); } -// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld4_s32( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* @@ -11393,7 +10474,7 @@ int32x2x4_t test_vld4_s32(int32_t const *a) { return vld4_s32(a); } -// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld4_s64( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* @@ -11411,7 +10492,7 @@ int64x1x4_t test_vld4_s64(int64_t const *a) { return vld4_s64(a); } -// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld4_f16( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* @@ -11429,7 +10510,7 @@ float16x4x4_t test_vld4_f16(float16_t const *a) { return vld4_f16(a); } -// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld4_f32( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* @@ -11447,7 +10528,7 @@ float32x2x4_t test_vld4_f32(float32_t const *a) { return vld4_f32(a); } -// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 { +// CHECK-LABEL: @test_vld4_f64( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* @@ -11465,7 +10546,7 @@ float64x1x4_t test_vld4_f64(float64_t const *a) { return vld4_f64(a); } -// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld4_p8( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* @@ -11482,7 +10563,7 @@ poly8x8x4_t test_vld4_p8(poly8_t const *a) { return vld4_p8(a); } -// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld4_p16( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* @@ -11500,7 +10581,7 @@ poly16x4x4_t test_vld4_p16(poly16_t const *a) { return vld4_p16(a); } -// CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] // CHECK: ret void @@ -11508,7 +10589,7 @@ void test_vst1q_u8(uint8_t *a, uint8x16_t b) { vst1q_u8(a, b); } -// CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -11519,7 +10600,7 @@ void test_vst1q_u16(uint16_t *a, uint16x8_t b) { vst1q_u16(a, b); } -// CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1q_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -11530,7 +10611,7 @@ void test_vst1q_u32(uint32_t *a, uint32x4_t b) { vst1q_u32(a, b); } -// CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1q_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* @@ -11541,7 +10622,7 @@ void test_vst1q_u64(uint64_t *a, uint64x2_t b) { vst1q_u64(a, b); } -// CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] // CHECK: ret void @@ -11549,7 +10630,7 @@ void test_vst1q_s8(int8_t *a, int8x16_t b) { vst1q_s8(a, b); } -// CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -11560,7 +10641,7 @@ void test_vst1q_s16(int16_t *a, int16x8_t b) { vst1q_s16(a, b); } -// CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1q_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -11571,7 +10652,7 @@ void test_vst1q_s32(int32_t *a, int32x4_t b) { vst1q_s32(a, b); } -// CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1q_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* @@ -11582,7 +10663,7 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) { vst1q_s64(a, b); } -// CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 { +// CHECK-LABEL: @test_vst1q_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -11593,7 +10674,7 @@ void test_vst1q_f16(float16_t *a, float16x8_t b) { vst1q_f16(a, b); } -// CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vst1q_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* @@ -11604,7 +10685,7 @@ void test_vst1q_f32(float32_t *a, float32x4_t b) { vst1q_f32(a, b); } -// CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vst1q_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* @@ -11615,7 +10696,7 @@ void test_vst1q_f64(float64_t *a, float64x2_t b) { vst1q_f64(a, b); } -// CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] // CHECK: ret void @@ -11623,7 +10704,7 @@ void test_vst1q_p8(poly8_t *a, poly8x16_t b) { vst1q_p8(a, b); } -// CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -11634,7 +10715,7 @@ void test_vst1q_p16(poly16_t *a, poly16x8_t b) { vst1q_p16(a, b); } -// CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_u8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] // CHECK: ret void @@ -11642,7 +10723,7 @@ void test_vst1_u8(uint8_t *a, uint8x8_t b) { vst1_u8(a, b); } -// CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -11653,7 +10734,7 @@ void test_vst1_u16(uint16_t *a, uint16x4_t b) { vst1_u16(a, b); } -// CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -11664,7 +10745,7 @@ void test_vst1_u32(uint32_t *a, uint32x2_t b) { vst1_u32(a, b); } -// CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* @@ -11675,7 +10756,7 @@ void test_vst1_u64(uint64_t *a, uint64x1_t b) { vst1_u64(a, b); } -// CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_s8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] // CHECK: ret void @@ -11683,7 +10764,7 @@ void test_vst1_s8(int8_t *a, int8x8_t b) { vst1_s8(a, b); } -// CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -11694,7 +10775,7 @@ void test_vst1_s16(int16_t *a, int16x4_t b) { vst1_s16(a, b); } -// CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -11705,7 +10786,7 @@ void test_vst1_s32(int32_t *a, int32x2_t b) { vst1_s32(a, b); } -// CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* @@ -11716,7 +10797,7 @@ void test_vst1_s64(int64_t *a, int64x1_t b) { vst1_s64(a, b); } -// CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: @test_vst1_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -11727,7 +10808,7 @@ void test_vst1_f16(float16_t *a, float16x4_t b) { vst1_f16(a, b); } -// CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vst1_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* @@ -11738,7 +10819,7 @@ void test_vst1_f32(float32_t *a, float32x2_t b) { vst1_f32(a, b); } -// CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vst1_f64( // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* @@ -11749,7 +10830,7 @@ void test_vst1_f64(float64_t *a, float64x1_t b) { vst1_f64(a, b); } -// CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_p8( // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] // CHECK: ret void @@ -11757,7 +10838,7 @@ void test_vst1_p8(poly8_t *a, poly8x8_t b) { vst1_p8(a, b); } -// CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -11768,7 +10849,7 @@ void test_vst1_p16(poly16_t *a, poly16x4_t b) { vst1_p16(a, b); } -// CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 @@ -11788,7 +10869,7 @@ void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) { vst2q_u8(a, b); } -// CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 @@ -11813,7 +10894,7 @@ void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) { vst2q_u16(a, b); } -// CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 @@ -11838,7 +10919,7 @@ void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) { vst2q_u32(a, b); } -// CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 @@ -11863,7 +10944,7 @@ void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) { vst2q_u64(a, b); } -// CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 @@ -11883,7 +10964,7 @@ void test_vst2q_s8(int8_t *a, int8x16x2_t b) { vst2q_s8(a, b); } -// CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 @@ -11908,7 +10989,7 @@ void test_vst2q_s16(int16_t *a, int16x8x2_t b) { vst2q_s16(a, b); } -// CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 @@ -11933,7 +11014,7 @@ void test_vst2q_s32(int32_t *a, int32x4x2_t b) { vst2q_s32(a, b); } -// CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 @@ -11958,7 +11039,7 @@ void test_vst2q_s64(int64_t *a, int64x2x2_t b) { vst2q_s64(a, b); } -// CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 @@ -11983,7 +11064,7 @@ void test_vst2q_f16(float16_t *a, float16x8x2_t b) { vst2q_f16(a, b); } -// CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 @@ -12008,7 +11089,7 @@ void test_vst2q_f32(float32_t *a, float32x4x2_t b) { vst2q_f32(a, b); } -// CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_f64( // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 @@ -12033,7 +11114,7 @@ void test_vst2q_f64(float64_t *a, float64x2x2_t b) { vst2q_f64(a, b); } -// CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 @@ -12053,7 +11134,7 @@ void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) { vst2q_p8(a, b); } -// CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 @@ -12078,7 +11159,7 @@ void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) { vst2q_p16(a, b); } -// CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 @@ -12098,7 +11179,7 @@ void test_vst2_u8(uint8_t *a, uint8x8x2_t b) { vst2_u8(a, b); } -// CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 @@ -12123,7 +11204,7 @@ void test_vst2_u16(uint16_t *a, uint16x4x2_t b) { vst2_u16(a, b); } -// CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 @@ -12148,7 +11229,7 @@ void test_vst2_u32(uint32_t *a, uint32x2x2_t b) { vst2_u32(a, b); } -// CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 @@ -12173,7 +11254,7 @@ void test_vst2_u64(uint64_t *a, uint64x1x2_t b) { vst2_u64(a, b); } -// CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 @@ -12193,7 +11274,7 @@ void test_vst2_s8(int8_t *a, int8x8x2_t b) { vst2_s8(a, b); } -// CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 @@ -12218,7 +11299,7 @@ void test_vst2_s16(int16_t *a, int16x4x2_t b) { vst2_s16(a, b); } -// CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 @@ -12243,7 +11324,7 @@ void test_vst2_s32(int32_t *a, int32x2x2_t b) { vst2_s32(a, b); } -// CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 @@ -12268,7 +11349,7 @@ void test_vst2_s64(int64_t *a, int64x1x2_t b) { vst2_s64(a, b); } -// CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 @@ -12293,7 +11374,7 @@ void test_vst2_f16(float16_t *a, float16x4x2_t b) { vst2_f16(a, b); } -// CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 @@ -12318,7 +11399,7 @@ void test_vst2_f32(float32_t *a, float32x2x2_t b) { vst2_f32(a, b); } -// CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_f64( // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 @@ -12343,7 +11424,7 @@ void test_vst2_f64(float64_t *a, float64x1x2_t b) { vst2_f64(a, b); } -// CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 @@ -12363,7 +11444,7 @@ void test_vst2_p8(poly8_t *a, poly8x8x2_t b) { vst2_p8(a, b); } -// CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 @@ -12388,7 +11469,7 @@ void test_vst2_p16(poly16_t *a, poly16x4x2_t b) { vst2_p16(a, b); } -// CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 @@ -12411,7 +11492,7 @@ void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) { vst3q_u8(a, b); } -// CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 @@ -12441,7 +11522,7 @@ void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) { vst3q_u16(a, b); } -// CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 @@ -12471,7 +11552,7 @@ void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) { vst3q_u32(a, b); } -// CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 @@ -12501,7 +11582,7 @@ void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) { vst3q_u64(a, b); } -// CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 @@ -12524,7 +11605,7 @@ void test_vst3q_s8(int8_t *a, int8x16x3_t b) { vst3q_s8(a, b); } -// CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 @@ -12554,7 +11635,7 @@ void test_vst3q_s16(int16_t *a, int16x8x3_t b) { vst3q_s16(a, b); } -// CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 @@ -12584,7 +11665,7 @@ void test_vst3q_s32(int32_t *a, int32x4x3_t b) { vst3q_s32(a, b); } -// CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 @@ -12614,7 +11695,7 @@ void test_vst3q_s64(int64_t *a, int64x2x3_t b) { vst3q_s64(a, b); } -// CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 @@ -12644,7 +11725,7 @@ void test_vst3q_f16(float16_t *a, float16x8x3_t b) { vst3q_f16(a, b); } -// CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 @@ -12674,7 +11755,7 @@ void test_vst3q_f32(float32_t *a, float32x4x3_t b) { vst3q_f32(a, b); } -// CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_f64( // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 @@ -12704,7 +11785,7 @@ void test_vst3q_f64(float64_t *a, float64x2x3_t b) { vst3q_f64(a, b); } -// CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 @@ -12727,7 +11808,7 @@ void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) { vst3q_p8(a, b); } -// CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 @@ -12757,7 +11838,7 @@ void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) { vst3q_p16(a, b); } -// CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 @@ -12780,7 +11861,7 @@ void test_vst3_u8(uint8_t *a, uint8x8x3_t b) { vst3_u8(a, b); } -// CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 @@ -12810,7 +11891,7 @@ void test_vst3_u16(uint16_t *a, uint16x4x3_t b) { vst3_u16(a, b); } -// CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 @@ -12840,7 +11921,7 @@ void test_vst3_u32(uint32_t *a, uint32x2x3_t b) { vst3_u32(a, b); } -// CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 @@ -12870,7 +11951,7 @@ void test_vst3_u64(uint64_t *a, uint64x1x3_t b) { vst3_u64(a, b); } -// CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 @@ -12893,7 +11974,7 @@ void test_vst3_s8(int8_t *a, int8x8x3_t b) { vst3_s8(a, b); } -// CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 @@ -12923,7 +12004,7 @@ void test_vst3_s16(int16_t *a, int16x4x3_t b) { vst3_s16(a, b); } -// CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 @@ -12953,7 +12034,7 @@ void test_vst3_s32(int32_t *a, int32x2x3_t b) { vst3_s32(a, b); } -// CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 @@ -12983,7 +12064,7 @@ void test_vst3_s64(int64_t *a, int64x1x3_t b) { vst3_s64(a, b); } -// CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 @@ -13013,7 +12094,7 @@ void test_vst3_f16(float16_t *a, float16x4x3_t b) { vst3_f16(a, b); } -// CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 @@ -13043,7 +12124,7 @@ void test_vst3_f32(float32_t *a, float32x2x3_t b) { vst3_f32(a, b); } -// CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_f64( // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 @@ -13073,7 +12154,7 @@ void test_vst3_f64(float64_t *a, float64x1x3_t b) { vst3_f64(a, b); } -// CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 @@ -13096,7 +12177,7 @@ void test_vst3_p8(poly8_t *a, poly8x8x3_t b) { vst3_p8(a, b); } -// CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 @@ -13126,7 +12207,7 @@ void test_vst3_p16(poly16_t *a, poly16x4x3_t b) { vst3_p16(a, b); } -// CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 @@ -13152,7 +12233,7 @@ void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) { vst4q_u8(a, b); } -// CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 @@ -13187,7 +12268,7 @@ void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) { vst4q_u16(a, b); } -// CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 @@ -13222,7 +12303,7 @@ void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) { vst4q_u32(a, b); } -// CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 @@ -13257,7 +12338,7 @@ void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) { vst4q_u64(a, b); } -// CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 @@ -13283,7 +12364,7 @@ void test_vst4q_s8(int8_t *a, int8x16x4_t b) { vst4q_s8(a, b); } -// CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 @@ -13318,7 +12399,7 @@ void test_vst4q_s16(int16_t *a, int16x8x4_t b) { vst4q_s16(a, b); } -// CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 @@ -13353,7 +12434,7 @@ void test_vst4q_s32(int32_t *a, int32x4x4_t b) { vst4q_s32(a, b); } -// CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 @@ -13388,7 +12469,7 @@ void test_vst4q_s64(int64_t *a, int64x2x4_t b) { vst4q_s64(a, b); } -// CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 @@ -13423,7 +12504,7 @@ void test_vst4q_f16(float16_t *a, float16x8x4_t b) { vst4q_f16(a, b); } -// CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 @@ -13458,7 +12539,7 @@ void test_vst4q_f32(float32_t *a, float32x4x4_t b) { vst4q_f32(a, b); } -// CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_f64( // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 @@ -13493,7 +12574,7 @@ void test_vst4q_f64(float64_t *a, float64x2x4_t b) { vst4q_f64(a, b); } -// CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 @@ -13519,7 +12600,7 @@ void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) { vst4q_p8(a, b); } -// CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 @@ -13554,7 +12635,7 @@ void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) { vst4q_p16(a, b); } -// CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 @@ -13580,7 +12661,7 @@ void test_vst4_u8(uint8_t *a, uint8x8x4_t b) { vst4_u8(a, b); } -// CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 @@ -13615,7 +12696,7 @@ void test_vst4_u16(uint16_t *a, uint16x4x4_t b) { vst4_u16(a, b); } -// CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 @@ -13650,7 +12731,7 @@ void test_vst4_u32(uint32_t *a, uint32x2x4_t b) { vst4_u32(a, b); } -// CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 @@ -13685,7 +12766,7 @@ void test_vst4_u64(uint64_t *a, uint64x1x4_t b) { vst4_u64(a, b); } -// CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 @@ -13711,7 +12792,7 @@ void test_vst4_s8(int8_t *a, int8x8x4_t b) { vst4_s8(a, b); } -// CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 @@ -13746,7 +12827,7 @@ void test_vst4_s16(int16_t *a, int16x4x4_t b) { vst4_s16(a, b); } -// CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 @@ -13781,7 +12862,7 @@ void test_vst4_s32(int32_t *a, int32x2x4_t b) { vst4_s32(a, b); } -// CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 @@ -13816,7 +12897,7 @@ void test_vst4_s64(int64_t *a, int64x1x4_t b) { vst4_s64(a, b); } -// CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 @@ -13851,7 +12932,7 @@ void test_vst4_f16(float16_t *a, float16x4x4_t b) { vst4_f16(a, b); } -// CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 @@ -13886,7 +12967,7 @@ void test_vst4_f32(float32_t *a, float32x2x4_t b) { vst4_f32(a, b); } -// CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_f64( // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 @@ -13921,7 +13002,7 @@ void test_vst4_f64(float64_t *a, float64x1x4_t b) { vst4_f64(a, b); } -// CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 @@ -13947,7 +13028,7 @@ void test_vst4_p8(poly8_t *a, poly8x8x4_t b) { vst4_p8(a, b); } -// CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 @@ -13982,7 +13063,7 @@ void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { vst4_p16(a, b); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_u8_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* @@ -13998,7 +13079,7 @@ uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) { return vld1q_u8_x2(a); } -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_u16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* @@ -14016,7 +13097,7 @@ uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { return vld1q_u16_x2(a); } -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_u32_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* @@ -14034,7 +13115,7 @@ uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) { return vld1q_u32_x2(a); } -// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_u64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* @@ -14052,7 +13133,7 @@ uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) { return vld1q_u64_x2(a); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_s8_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* @@ -14068,7 +13149,7 @@ int8x16x2_t test_vld1q_s8_x2(int8_t const *a) { return vld1q_s8_x2(a); } -// CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_s16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* @@ -14086,7 +13167,7 @@ int16x8x2_t test_vld1q_s16_x2(int16_t const *a) { return vld1q_s16_x2(a); } -// CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_s32_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* @@ -14104,7 +13185,7 @@ int32x4x2_t test_vld1q_s32_x2(int32_t const *a) { return vld1q_s32_x2(a); } -// CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_s64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* @@ -14122,7 +13203,7 @@ int64x2x2_t test_vld1q_s64_x2(int64_t const *a) { return vld1q_s64_x2(a); } -// CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 { +// CHECK-LABEL: @test_vld1q_f16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* @@ -14140,7 +13221,7 @@ float16x8x2_t test_vld1q_f16_x2(float16_t const *a) { return vld1q_f16_x2(a); } -// CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 { +// CHECK-LABEL: @test_vld1q_f32_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* @@ -14158,7 +13239,7 @@ float32x4x2_t test_vld1q_f32_x2(float32_t const *a) { return vld1q_f32_x2(a); } -// CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 { +// CHECK-LABEL: @test_vld1q_f64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* @@ -14176,7 +13257,7 @@ float64x2x2_t test_vld1q_f64_x2(float64_t const *a) { return vld1q_f64_x2(a); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_p8_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* @@ -14192,7 +13273,7 @@ poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) { return vld1q_p8_x2(a); } -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_p16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* @@ -14210,7 +13291,7 @@ poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) { return vld1q_p16_x2(a); } -// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_p64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* @@ -14228,7 +13309,7 @@ poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) { return vld1q_p64_x2(a); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_u8_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* @@ -14244,7 +13325,7 @@ uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) { return vld1_u8_x2(a); } -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_u16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* @@ -14262,7 +13343,7 @@ uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { return vld1_u16_x2(a); } -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_u32_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* @@ -14280,7 +13361,7 @@ uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) { return vld1_u32_x2(a); } -// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_u64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* @@ -14298,7 +13379,7 @@ uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) { return vld1_u64_x2(a); } -// CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_s8_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* @@ -14314,7 +13395,7 @@ int8x8x2_t test_vld1_s8_x2(int8_t const *a) { return vld1_s8_x2(a); } -// CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_s16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* @@ -14332,7 +13413,7 @@ int16x4x2_t test_vld1_s16_x2(int16_t const *a) { return vld1_s16_x2(a); } -// CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_s32_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* @@ -14350,7 +13431,7 @@ int32x2x2_t test_vld1_s32_x2(int32_t const *a) { return vld1_s32_x2(a); } -// CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_s64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* @@ -14368,7 +13449,7 @@ int64x1x2_t test_vld1_s64_x2(int64_t const *a) { return vld1_s64_x2(a); } -// CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 { +// CHECK-LABEL: @test_vld1_f16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* @@ -14386,7 +13467,7 @@ float16x4x2_t test_vld1_f16_x2(float16_t const *a) { return vld1_f16_x2(a); } -// CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 { +// CHECK-LABEL: @test_vld1_f32_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* @@ -14404,7 +13485,7 @@ float32x2x2_t test_vld1_f32_x2(float32_t const *a) { return vld1_f32_x2(a); } -// CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 { +// CHECK-LABEL: @test_vld1_f64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* @@ -14422,7 +13503,7 @@ float64x1x2_t test_vld1_f64_x2(float64_t const *a) { return vld1_f64_x2(a); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_p8_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* @@ -14438,7 +13519,7 @@ poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) { return vld1_p8_x2(a); } -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_p16_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* @@ -14456,7 +13537,7 @@ poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) { return vld1_p16_x2(a); } -// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_p64_x2( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* @@ -14474,7 +13555,7 @@ poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) { return vld1_p64_x2(a); } -// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_u8_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* @@ -14490,7 +13571,7 @@ uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) { return vld1q_u8_x3(a); } -// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_u16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* @@ -14508,7 +13589,7 @@ uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) { return vld1q_u16_x3(a); } -// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_u32_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* @@ -14526,7 +13607,7 @@ uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) { return vld1q_u32_x3(a); } -// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_u64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* @@ -14544,7 +13625,7 @@ uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) { return vld1q_u64_x3(a); } -// CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_s8_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* @@ -14560,7 +13641,7 @@ int8x16x3_t test_vld1q_s8_x3(int8_t const *a) { return vld1q_s8_x3(a); } -// CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_s16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* @@ -14578,7 +13659,7 @@ int16x8x3_t test_vld1q_s16_x3(int16_t const *a) { return vld1q_s16_x3(a); } -// CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_s32_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* @@ -14596,7 +13677,7 @@ int32x4x3_t test_vld1q_s32_x3(int32_t const *a) { return vld1q_s32_x3(a); } -// CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_s64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* @@ -14614,7 +13695,7 @@ int64x2x3_t test_vld1q_s64_x3(int64_t const *a) { return vld1q_s64_x3(a); } -// CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 { +// CHECK-LABEL: @test_vld1q_f16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* @@ -14632,7 +13713,7 @@ float16x8x3_t test_vld1q_f16_x3(float16_t const *a) { return vld1q_f16_x3(a); } -// CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 { +// CHECK-LABEL: @test_vld1q_f32_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* @@ -14650,7 +13731,7 @@ float32x4x3_t test_vld1q_f32_x3(float32_t const *a) { return vld1q_f32_x3(a); } -// CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 { +// CHECK-LABEL: @test_vld1q_f64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* @@ -14668,7 +13749,7 @@ float64x2x3_t test_vld1q_f64_x3(float64_t const *a) { return vld1q_f64_x3(a); } -// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_p8_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* @@ -14684,7 +13765,7 @@ poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) { return vld1q_p8_x3(a); } -// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_p16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* @@ -14702,7 +13783,7 @@ poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) { return vld1q_p16_x3(a); } -// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_p64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* @@ -14720,7 +13801,7 @@ poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) { return vld1q_p64_x3(a); } -// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_u8_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* @@ -14736,7 +13817,7 @@ uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) { return vld1_u8_x3(a); } -// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_u16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* @@ -14754,7 +13835,7 @@ uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) { return vld1_u16_x3(a); } -// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_u32_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* @@ -14772,7 +13853,7 @@ uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) { return vld1_u32_x3(a); } -// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_u64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* @@ -14790,7 +13871,7 @@ uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) { return vld1_u64_x3(a); } -// CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_s8_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* @@ -14806,7 +13887,7 @@ int8x8x3_t test_vld1_s8_x3(int8_t const *a) { return vld1_s8_x3(a); } -// CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_s16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* @@ -14824,7 +13905,7 @@ int16x4x3_t test_vld1_s16_x3(int16_t const *a) { return vld1_s16_x3(a); } -// CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_s32_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* @@ -14842,7 +13923,7 @@ int32x2x3_t test_vld1_s32_x3(int32_t const *a) { return vld1_s32_x3(a); } -// CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_s64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* @@ -14860,7 +13941,7 @@ int64x1x3_t test_vld1_s64_x3(int64_t const *a) { return vld1_s64_x3(a); } -// CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 { +// CHECK-LABEL: @test_vld1_f16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* @@ -14878,7 +13959,7 @@ float16x4x3_t test_vld1_f16_x3(float16_t const *a) { return vld1_f16_x3(a); } -// CHECK-LABEL: define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) #0 { +// CHECK-LABEL: @test_vld1_f32_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* @@ -14896,7 +13977,7 @@ float32x2x3_t test_vld1_f32_x3(float32_t const *a) { return vld1_f32_x3(a); } -// CHECK-LABEL: define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) #0 { +// CHECK-LABEL: @test_vld1_f64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* @@ -14914,7 +13995,7 @@ float64x1x3_t test_vld1_f64_x3(float64_t const *a) { return vld1_f64_x3(a); } -// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld1_p8_x3(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_p8_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* @@ -14930,7 +14011,7 @@ poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) { return vld1_p8_x3(a); } -// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld1_p16_x3(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_p16_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* @@ -14948,7 +14029,7 @@ poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) { return vld1_p16_x3(a); } -// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld1_p64_x3(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_p64_x3( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* @@ -14966,7 +14047,7 @@ poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) { return vld1_p64_x3(a); } -// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_u8_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* @@ -14982,7 +14063,7 @@ uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) { return vld1q_u8_x4(a); } -// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_u16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* @@ -15000,7 +14081,7 @@ uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) { return vld1q_u16_x4(a); } -// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_u32_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* @@ -15018,7 +14099,7 @@ uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) { return vld1q_u32_x4(a); } -// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_u64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* @@ -15036,7 +14117,7 @@ uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) { return vld1q_u64_x4(a); } -// CHECK-LABEL: define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_s8_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* @@ -15052,7 +14133,7 @@ int8x16x4_t test_vld1q_s8_x4(int8_t const *a) { return vld1q_s8_x4(a); } -// CHECK-LABEL: define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_s16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* @@ -15070,7 +14151,7 @@ int16x8x4_t test_vld1q_s16_x4(int16_t const *a) { return vld1q_s16_x4(a); } -// CHECK-LABEL: define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_s32_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* @@ -15088,7 +14169,7 @@ int32x4x4_t test_vld1q_s32_x4(int32_t const *a) { return vld1q_s32_x4(a); } -// CHECK-LABEL: define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_s64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* @@ -15106,7 +14187,7 @@ int64x2x4_t test_vld1q_s64_x4(int64_t const *a) { return vld1q_s64_x4(a); } -// CHECK-LABEL: define %struct.float16x8x4_t @test_vld1q_f16_x4(half* %a) #0 { +// CHECK-LABEL: @test_vld1q_f16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* @@ -15124,7 +14205,7 @@ float16x8x4_t test_vld1q_f16_x4(float16_t const *a) { return vld1q_f16_x4(a); } -// CHECK-LABEL: define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) #0 { +// CHECK-LABEL: @test_vld1q_f32_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* @@ -15142,7 +14223,7 @@ float32x4x4_t test_vld1q_f32_x4(float32_t const *a) { return vld1q_f32_x4(a); } -// CHECK-LABEL: define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) #0 { +// CHECK-LABEL: @test_vld1q_f64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* @@ -15160,7 +14241,7 @@ float64x2x4_t test_vld1q_f64_x4(float64_t const *a) { return vld1q_f64_x4(a); } -// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld1q_p8_x4(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_p8_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* @@ -15176,7 +14257,7 @@ poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) { return vld1q_p8_x4(a); } -// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld1q_p16_x4(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_p16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* @@ -15194,7 +14275,7 @@ poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) { return vld1q_p16_x4(a); } -// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld1q_p64_x4(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_p64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* @@ -15212,7 +14293,7 @@ poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) { return vld1q_p64_x4(a); } -// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_u8_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* @@ -15228,7 +14309,7 @@ uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) { return vld1_u8_x4(a); } -// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_u16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* @@ -15246,7 +14327,7 @@ uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) { return vld1_u16_x4(a); } -// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_u32_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* @@ -15264,7 +14345,7 @@ uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) { return vld1_u32_x4(a); } -// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_u64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* @@ -15282,7 +14363,7 @@ uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) { return vld1_u64_x4(a); } -// CHECK-LABEL: define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_s8_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* @@ -15298,7 +14379,7 @@ int8x8x4_t test_vld1_s8_x4(int8_t const *a) { return vld1_s8_x4(a); } -// CHECK-LABEL: define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_s16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* @@ -15316,7 +14397,7 @@ int16x4x4_t test_vld1_s16_x4(int16_t const *a) { return vld1_s16_x4(a); } -// CHECK-LABEL: define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_s32_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* @@ -15334,7 +14415,7 @@ int32x2x4_t test_vld1_s32_x4(int32_t const *a) { return vld1_s32_x4(a); } -// CHECK-LABEL: define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_s64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* @@ -15352,7 +14433,7 @@ int64x1x4_t test_vld1_s64_x4(int64_t const *a) { return vld1_s64_x4(a); } -// CHECK-LABEL: define %struct.float16x4x4_t @test_vld1_f16_x4(half* %a) #0 { +// CHECK-LABEL: @test_vld1_f16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* @@ -15370,7 +14451,7 @@ float16x4x4_t test_vld1_f16_x4(float16_t const *a) { return vld1_f16_x4(a); } -// CHECK-LABEL: define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) #0 { +// CHECK-LABEL: @test_vld1_f32_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* @@ -15388,7 +14469,7 @@ float32x2x4_t test_vld1_f32_x4(float32_t const *a) { return vld1_f32_x4(a); } -// CHECK-LABEL: define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) #0 { +// CHECK-LABEL: @test_vld1_f64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* @@ -15406,7 +14487,7 @@ float64x1x4_t test_vld1_f64_x4(float64_t const *a) { return vld1_f64_x4(a); } -// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld1_p8_x4(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_p8_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* @@ -15422,7 +14503,7 @@ poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) { return vld1_p8_x4(a); } -// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld1_p16_x4(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_p16_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* @@ -15440,7 +14521,7 @@ poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) { return vld1_p16_x4(a); } -// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld1_p64_x4(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_p64_x4( // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* @@ -15458,7 +14539,7 @@ poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { return vld1_p64_x4(a); } -// CHECK-LABEL: define void @test_vst1q_u8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u8_x2( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 @@ -15478,7 +14559,7 @@ void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { vst1q_u8_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_u16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u16_x2( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 @@ -15504,7 +14585,7 @@ void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { vst1q_u16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_u32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u32_x2( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 @@ -15530,7 +14611,7 @@ void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) { vst1q_u32_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_u64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u64_x2( // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 @@ -15556,7 +14637,7 @@ void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) { vst1q_u64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s8_x2( // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 @@ -15576,7 +14657,7 @@ void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { vst1q_s8_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s16_x2( // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 @@ -15602,7 +14683,7 @@ void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) { vst1q_s16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s32_x2( // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 @@ -15628,7 +14709,7 @@ void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) { vst1q_s32_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s64_x2( // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 @@ -15654,7 +14735,7 @@ void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) { vst1q_s64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_f16_x2(half* %a, [2 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f16_x2( // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 @@ -15680,7 +14761,7 @@ void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) { vst1q_f16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f32_x2( // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 @@ -15706,7 +14787,7 @@ void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) { vst1q_f32_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f64_x2( // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 @@ -15732,7 +14813,7 @@ void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) { vst1q_f64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_p8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p8_x2( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 @@ -15752,7 +14833,7 @@ void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { vst1q_p8_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_p16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p16_x2( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 @@ -15778,7 +14859,7 @@ void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) { vst1q_p16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_p64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p64_x2( // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0 @@ -15804,7 +14885,7 @@ void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) { vst1q_p64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_u8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u8_x2( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 @@ -15824,7 +14905,7 @@ void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { vst1_u8_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_u16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u16_x2( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 @@ -15850,7 +14931,7 @@ void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { vst1_u16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_u32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u32_x2( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 @@ -15876,7 +14957,7 @@ void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) { vst1_u32_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_u64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u64_x2( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 @@ -15902,7 +14983,7 @@ void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) { vst1_u64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s8_x2( // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 @@ -15922,7 +15003,7 @@ void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { vst1_s8_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s16_x2( // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 @@ -15948,7 +15029,7 @@ void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) { vst1_s16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s32_x2( // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 @@ -15974,7 +15055,7 @@ void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) { vst1_s32_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s64_x2( // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 @@ -16000,7 +15081,7 @@ void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) { vst1_s64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_f16_x2(half* %a, [2 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f16_x2( // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 @@ -16026,7 +15107,7 @@ void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) { vst1_f16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f32_x2( // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 @@ -16052,7 +15133,7 @@ void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) { vst1_f32_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f64_x2( // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 @@ -16078,7 +15159,7 @@ void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) { vst1_f64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_p8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p8_x2( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 @@ -16098,7 +15179,7 @@ void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { vst1_p8_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_p16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p16_x2( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 @@ -16124,7 +15205,7 @@ void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) { vst1_p16_x2(a, b); } -// CHECK-LABEL: define void @test_vst1_p64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p64_x2( // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0 @@ -16150,7 +15231,7 @@ void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) { vst1_p64_x2(a, b); } -// CHECK-LABEL: define void @test_vst1q_u8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u8_x3( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 @@ -16173,7 +15254,7 @@ void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { vst1q_u8_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_u16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u16_x3( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 @@ -16204,7 +15285,7 @@ void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { vst1q_u16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_u32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u32_x3( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 @@ -16235,7 +15316,7 @@ void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { vst1q_u32_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_u64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u64_x3( // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 @@ -16266,7 +15347,7 @@ void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { vst1q_u64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s8_x3( // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 @@ -16289,7 +15370,7 @@ void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { vst1q_s8_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s16_x3( // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 @@ -16320,7 +15401,7 @@ void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { vst1q_s16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s32_x3( // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 @@ -16351,7 +15432,7 @@ void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { vst1q_s32_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s64_x3( // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 @@ -16382,7 +15463,7 @@ void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { vst1q_s64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_f16_x3(half* %a, [3 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f16_x3( // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 @@ -16413,7 +15494,7 @@ void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { vst1q_f16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f32_x3( // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 @@ -16444,7 +15525,7 @@ void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { vst1q_f32_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f64_x3( // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 @@ -16475,7 +15556,7 @@ void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { vst1q_f64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_p8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p8_x3( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 @@ -16498,7 +15579,7 @@ void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { vst1q_p8_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_p16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p16_x3( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 @@ -16529,7 +15610,7 @@ void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { vst1q_p16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_p64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p64_x3( // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0 @@ -16560,7 +15641,7 @@ void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { vst1q_p64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_u8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u8_x3( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 @@ -16583,7 +15664,7 @@ void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { vst1_u8_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_u16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u16_x3( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 @@ -16614,7 +15695,7 @@ void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { vst1_u16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_u32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u32_x3( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 @@ -16645,7 +15726,7 @@ void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { vst1_u32_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_u64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u64_x3( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 @@ -16676,7 +15757,7 @@ void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { vst1_u64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s8_x3( // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 @@ -16699,7 +15780,7 @@ void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { vst1_s8_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s16_x3( // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 @@ -16730,7 +15811,7 @@ void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { vst1_s16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s32_x3( // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 @@ -16761,7 +15842,7 @@ void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { vst1_s32_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s64_x3( // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 @@ -16792,7 +15873,7 @@ void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { vst1_s64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_f16_x3(half* %a, [3 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f16_x3( // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 @@ -16823,7 +15904,7 @@ void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { vst1_f16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f32_x3( // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 @@ -16854,7 +15935,7 @@ void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { vst1_f32_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f64_x3( // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 @@ -16885,7 +15966,7 @@ void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { vst1_f64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_p8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p8_x3( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 @@ -16908,7 +15989,7 @@ void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { vst1_p8_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_p16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p16_x3( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 @@ -16939,7 +16020,7 @@ void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { vst1_p16_x3(a, b); } -// CHECK-LABEL: define void @test_vst1_p64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p64_x3( // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0 @@ -16970,7 +16051,7 @@ void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { vst1_p64_x3(a, b); } -// CHECK-LABEL: define void @test_vst1q_u8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u8_x4( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 @@ -16996,7 +16077,7 @@ void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { vst1q_u8_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_u16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u16_x4( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 @@ -17032,7 +16113,7 @@ void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { vst1q_u16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_u32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u32_x4( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 @@ -17068,7 +16149,7 @@ void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { vst1q_u32_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_u64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_u64_x4( // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 @@ -17104,7 +16185,7 @@ void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { vst1q_u64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s8_x4( // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 @@ -17130,7 +16211,7 @@ void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { vst1q_s8_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s16_x4( // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 @@ -17166,7 +16247,7 @@ void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { vst1q_s16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s32_x4( // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 @@ -17202,7 +16283,7 @@ void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { vst1q_s32_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_s64_x4( // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 @@ -17238,7 +16319,7 @@ void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { vst1q_s64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_f16_x4(half* %a, [4 x <8 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f16_x4( // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 @@ -17274,7 +16355,7 @@ void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { vst1q_f16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f32_x4( // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 @@ -17310,7 +16391,7 @@ void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { vst1q_f32_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_f64_x4( // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 @@ -17346,7 +16427,7 @@ void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { vst1q_f64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_p8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p8_x4( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 @@ -17372,7 +16453,7 @@ void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { vst1q_p8_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_p16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p16_x4( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 @@ -17408,7 +16489,7 @@ void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { vst1q_p16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1q_p64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1q_p64_x4( // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0 @@ -17444,7 +16525,7 @@ void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { vst1q_p64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_u8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u8_x4( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 @@ -17470,7 +16551,7 @@ void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { vst1_u8_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_u16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u16_x4( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 @@ -17506,7 +16587,7 @@ void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { vst1_u16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_u32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u32_x4( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 @@ -17542,7 +16623,7 @@ void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { vst1_u32_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_u64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_u64_x4( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 @@ -17578,7 +16659,7 @@ void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { vst1_u64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s8_x4( // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 @@ -17604,7 +16685,7 @@ void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { vst1_s8_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s16_x4( // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 @@ -17640,7 +16721,7 @@ void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { vst1_s16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s32_x4( // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 @@ -17676,7 +16757,7 @@ void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { vst1_s32_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_s64_x4( // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 @@ -17712,7 +16793,7 @@ void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { vst1_s64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_f16_x4(half* %a, [4 x <4 x half>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f16_x4( // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 @@ -17748,7 +16829,7 @@ void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { vst1_f16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f32_x4( // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 @@ -17784,7 +16865,7 @@ void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { vst1_f32_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_f64_x4( // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 @@ -17820,7 +16901,7 @@ void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { vst1_f64_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_p8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p8_x4( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 @@ -17846,7 +16927,7 @@ void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { vst1_p8_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_p16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p16_x4( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 @@ -17882,7 +16963,7 @@ void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { vst1_p16_x4(a, b); } -// CHECK-LABEL: define void @test_vst1_p64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { +// CHECK-LABEL: @test_vst1_p64_x4( // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0 @@ -17918,7 +16999,7 @@ void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) { vst1_p64_x4(a, b); } -// CHECK-LABEL: define i64 @test_vceqd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vceqd_s64( // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -17926,7 +17007,7 @@ int64_t test_vceqd_s64(int64_t a, int64_t b) { return (int64_t)vceqd_s64(a, b); } -// CHECK-LABEL: define i64 @test_vceqd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vceqd_u64( // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -17934,7 +17015,7 @@ uint64_t test_vceqd_u64(uint64_t a, uint64_t b) { return (int64_t)vceqd_u64(a, b); } -// CHECK-LABEL: define i64 @test_vceqzd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vceqzd_s64( // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQZ_I]] @@ -17942,7 +17023,7 @@ int64_t test_vceqzd_s64(int64_t a) { return (int64_t)vceqzd_s64(a); } -// CHECK-LABEL: define i64 @test_vceqzd_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vceqzd_u64( // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQZD_I]] @@ -17950,7 +17031,7 @@ int64_t test_vceqzd_u64(int64_t a) { return (int64_t)vceqzd_u64(a); } -// CHECK-LABEL: define i64 @test_vcged_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcged_s64( // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -17958,15 +17039,15 @@ int64_t test_vcged_s64(int64_t a, int64_t b) { return (int64_t)vcged_s64(a, b); } -// CHECK-LABEL: define i64 @test_vcged_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcged_u64( // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] uint64_t test_vcged_u64(uint64_t a, uint64_t b) { - return (uint64_t)vcged_u64(a, b); + return (uint64_t)vcged_u64(a, b); } -// CHECK-LABEL: define i64 @test_vcgezd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vcgezd_s64( // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCGEZ_I]] @@ -17974,7 +17055,7 @@ int64_t test_vcgezd_s64(int64_t a) { return (int64_t)vcgezd_s64(a); } -// CHECK-LABEL: define i64 @test_vcgtd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcgtd_s64( // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -17982,7 +17063,7 @@ int64_t test_vcgtd_s64(int64_t a, int64_t b) { return (int64_t)vcgtd_s64(a, b); } -// CHECK-LABEL: define i64 @test_vcgtd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcgtd_u64( // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -17990,7 +17071,7 @@ uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) { return (uint64_t)vcgtd_u64(a, b); } -// CHECK-LABEL: define i64 @test_vcgtzd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vcgtzd_s64( // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCGTZ_I]] @@ -17998,7 +17079,7 @@ int64_t test_vcgtzd_s64(int64_t a) { return (int64_t)vcgtzd_s64(a); } -// CHECK-LABEL: define i64 @test_vcled_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcled_s64( // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -18006,7 +17087,7 @@ int64_t test_vcled_s64(int64_t a, int64_t b) { return (int64_t)vcled_s64(a, b); } -// CHECK-LABEL: define i64 @test_vcled_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcled_u64( // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -18014,7 +17095,7 @@ uint64_t test_vcled_u64(uint64_t a, uint64_t b) { return (uint64_t)vcled_u64(a, b); } -// CHECK-LABEL: define i64 @test_vclezd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vclezd_s64( // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCLEZ_I]] @@ -18022,7 +17103,7 @@ int64_t test_vclezd_s64(int64_t a) { return (int64_t)vclezd_s64(a); } -// CHECK-LABEL: define i64 @test_vcltd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcltd_s64( // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -18030,7 +17111,7 @@ int64_t test_vcltd_s64(int64_t a, int64_t b) { return (int64_t)vcltd_s64(a, b); } -// CHECK-LABEL: define i64 @test_vcltd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vcltd_u64( // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQD_I]] @@ -18038,7 +17119,7 @@ uint64_t test_vcltd_u64(uint64_t a, uint64_t b) { return (uint64_t)vcltd_u64(a, b); } -// CHECK-LABEL: define i64 @test_vcltzd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vcltzd_s64( // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCLTZ_I]] @@ -18046,7 +17127,7 @@ int64_t test_vcltzd_s64(int64_t a) { return (int64_t)vcltzd_s64(a); } -// CHECK-LABEL: define i64 @test_vtstd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vtstd_s64( // CHECK: [[TMP0:%.*]] = and i64 %a, %b // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 @@ -18055,7 +17136,7 @@ int64_t test_vtstd_s64(int64_t a, int64_t b) { return (int64_t)vtstd_s64(a, b); } -// CHECK-LABEL: define i64 @test_vtstd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vtstd_u64( // CHECK: [[TMP0:%.*]] = and i64 %a, %b // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 @@ -18064,14 +17145,14 @@ uint64_t test_vtstd_u64(uint64_t a, uint64_t b) { return (uint64_t)vtstd_u64(a, b); } -// CHECK-LABEL: define i64 @test_vabsd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vabsd_s64( // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4 // CHECK: ret i64 [[VABSD_S64_I]] int64_t test_vabsd_s64(int64_t a) { return (int64_t)vabsd_s64(a); } -// CHECK-LABEL: define i8 @test_vqabsb_s8(i8 %a) #0 { +// CHECK-LABEL: @test_vqabsb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0 @@ -18080,7 +17161,7 @@ int8_t test_vqabsb_s8(int8_t a) { return (int8_t)vqabsb_s8(a); } -// CHECK-LABEL: define i16 @test_vqabsh_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqabsh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0 @@ -18089,28 +17170,28 @@ int16_t test_vqabsh_s16(int16_t a) { return (int16_t)vqabsh_s16(a); } -// CHECK-LABEL: define i32 @test_vqabss_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqabss_s32( // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4 // CHECK: ret i32 [[VQABSS_S32_I]] int32_t test_vqabss_s32(int32_t a) { return (int32_t)vqabss_s32(a); } -// CHECK-LABEL: define i64 @test_vqabsd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqabsd_s64( // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4 // CHECK: ret i64 [[VQABSD_S64_I]] int64_t test_vqabsd_s64(int64_t a) { return (int64_t)vqabsd_s64(a); } -// CHECK-LABEL: define i64 @test_vnegd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vnegd_s64( // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a // CHECK: ret i64 [[VNEGD_I]] int64_t test_vnegd_s64(int64_t a) { return (int64_t)vnegd_s64(a); } -// CHECK-LABEL: define i8 @test_vqnegb_s8(i8 %a) #0 { +// CHECK-LABEL: @test_vqnegb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0 @@ -18119,7 +17200,7 @@ int8_t test_vqnegb_s8(int8_t a) { return (int8_t)vqnegb_s8(a); } -// CHECK-LABEL: define i16 @test_vqnegh_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqnegh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0 @@ -18128,21 +17209,21 @@ int16_t test_vqnegh_s16(int16_t a) { return (int16_t)vqnegh_s16(a); } -// CHECK-LABEL: define i32 @test_vqnegs_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqnegs_s32( // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4 // CHECK: ret i32 [[VQNEGS_S32_I]] int32_t test_vqnegs_s32(int32_t a) { return (int32_t)vqnegs_s32(a); } -// CHECK-LABEL: define i64 @test_vqnegd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqnegd_s64( // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4 // CHECK: ret i64 [[VQNEGD_S64_I]] int64_t test_vqnegd_s64(int64_t a) { return (int64_t)vqnegd_s64(a); } -// CHECK-LABEL: define i8 @test_vuqaddb_s8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vuqaddb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -18152,7 +17233,7 @@ int8_t test_vuqaddb_s8(int8_t a, int8_t b) { return (int8_t)vuqaddb_s8(a, b); } -// CHECK-LABEL: define i16 @test_vuqaddh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vuqaddh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -18162,21 +17243,21 @@ int16_t test_vuqaddh_s16(int16_t a, int16_t b) { return (int16_t)vuqaddh_s16(a, b); } -// CHECK-LABEL: define i32 @test_vuqadds_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vuqadds_s32( // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VUQADDS_S32_I]] int32_t test_vuqadds_s32(int32_t a, int32_t b) { return (int32_t)vuqadds_s32(a, b); } -// CHECK-LABEL: define i64 @test_vuqaddd_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vuqaddd_s64( // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VUQADDD_S64_I]] int64_t test_vuqaddd_s64(int64_t a, int64_t b) { return (int64_t)vuqaddd_s64(a, b); } -// CHECK-LABEL: define i8 @test_vsqaddb_u8(i8 %a, i8 %b) #0 { +// CHECK-LABEL: @test_vsqaddb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 @@ -18186,7 +17267,7 @@ uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) { return (uint8_t)vsqaddb_u8(a, b); } -// CHECK-LABEL: define i16 @test_vsqaddh_u16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vsqaddh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -18196,21 +17277,21 @@ uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) { return (uint16_t)vsqaddh_u16(a, b); } -// CHECK-LABEL: define i32 @test_vsqadds_u32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vsqadds_u32( // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4 // CHECK: ret i32 [[VSQADDS_U32_I]] uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) { return (uint32_t)vsqadds_u32(a, b); } -// CHECK-LABEL: define i64 @test_vsqaddd_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsqaddd_u64( // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4 // CHECK: ret i64 [[VSQADDD_U64_I]] uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { return (uint64_t)vsqaddd_u64(a, b); } -// CHECK-LABEL: define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) #0 { +// CHECK-LABEL: @test_vqdmlalh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -18218,11 +17299,10 @@ uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4 // CHECK: ret i32 [[VQDMLXL1_I]] int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { - return (int32_t)vqdmlalh_s16(a, b, c); } -// CHECK-LABEL: define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlals_s32( // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 // CHECK: ret i64 [[VQDMLXL1_I]] @@ -18230,7 +17310,7 @@ int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { return (int64_t)vqdmlals_s32(a, b, c); } -// CHECK-LABEL: define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) #0 { +// CHECK-LABEL: @test_vqdmlslh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -18238,11 +17318,10 @@ int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4 // CHECK: ret i32 [[VQDMLXL1_I]] int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { - return (int32_t)vqdmlslh_s16(a, b, c); } -// CHECK-LABEL: define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlsls_s32( // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 // CHECK: ret i64 [[VQDMLXL1_I]] @@ -18250,7 +17329,7 @@ int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { return (int64_t)vqdmlsls_s32(a, b, c); } -// CHECK-LABEL: define i32 @test_vqdmullh_s16(i16 %a, i16 %b) #0 { +// CHECK-LABEL: @test_vqdmullh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 @@ -18260,14 +17339,14 @@ int32_t test_vqdmullh_s16(int16_t a, int16_t b) { return (int32_t)vqdmullh_s16(a, b); } -// CHECK-LABEL: define i64 @test_vqdmulls_s32(i32 %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmulls_s32( // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4 // CHECK: ret i64 [[VQDMULLS_S32_I]] int64_t test_vqdmulls_s32(int32_t a, int32_t b) { return (int64_t)vqdmulls_s32(a, b); } -// CHECK-LABEL: define i8 @test_vqmovunh_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqmovunh_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0 @@ -18276,7 +17355,7 @@ int8_t test_vqmovunh_s16(int16_t a) { return (int8_t)vqmovunh_s16(a); } -// CHECK-LABEL: define i16 @test_vqmovuns_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqmovuns_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0 @@ -18285,14 +17364,14 @@ int16_t test_vqmovuns_s32(int32_t a) { return (int16_t)vqmovuns_s32(a); } -// CHECK-LABEL: define i32 @test_vqmovund_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqmovund_s64( // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4 // CHECK: ret i32 [[VQMOVUND_S64_I]] int32_t test_vqmovund_s64(int64_t a) { return (int32_t)vqmovund_s64(a); } -// CHECK-LABEL: define i8 @test_vqmovnh_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqmovnh_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0 @@ -18301,7 +17380,7 @@ int8_t test_vqmovnh_s16(int16_t a) { return (int8_t)vqmovnh_s16(a); } -// CHECK-LABEL: define i16 @test_vqmovns_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqmovns_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0 @@ -18310,14 +17389,14 @@ int16_t test_vqmovns_s32(int32_t a) { return (int16_t)vqmovns_s32(a); } -// CHECK-LABEL: define i32 @test_vqmovnd_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqmovnd_s64( // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4 // CHECK: ret i32 [[VQMOVND_S64_I]] int32_t test_vqmovnd_s64(int64_t a) { return (int32_t)vqmovnd_s64(a); } -// CHECK-LABEL: define i8 @test_vqmovnh_u16(i16 %a) #0 { +// CHECK-LABEL: @test_vqmovnh_u16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0 @@ -18326,7 +17405,7 @@ int8_t test_vqmovnh_u16(int16_t a) { return (int8_t)vqmovnh_u16(a); } -// CHECK-LABEL: define i16 @test_vqmovns_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vqmovns_u32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0 @@ -18335,14 +17414,14 @@ int16_t test_vqmovns_u32(int32_t a) { return (int16_t)vqmovns_u32(a); } -// CHECK-LABEL: define i32 @test_vqmovnd_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vqmovnd_u64( // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4 // CHECK: ret i32 [[VQMOVND_U64_I]] int32_t test_vqmovnd_u64(int64_t a) { return (int32_t)vqmovnd_u64(a); } -// CHECK-LABEL: define i32 @test_vceqs_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vceqs_f32( // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCMPD_I]] @@ -18350,7 +17429,7 @@ uint32_t test_vceqs_f32(float32_t a, float32_t b) { return (uint32_t)vceqs_f32(a, b); } -// CHECK-LABEL: define i64 @test_vceqd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vceqd_f64( // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCMPD_I]] @@ -18358,7 +17437,7 @@ uint64_t test_vceqd_f64(float64_t a, float64_t b) { return (uint64_t)vceqd_f64(a, b); } -// CHECK-LABEL: define i32 @test_vceqzs_f32(float %a) #0 { +// CHECK-LABEL: @test_vceqzs_f32( // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCEQZ_I]] @@ -18366,7 +17445,7 @@ uint32_t test_vceqzs_f32(float32_t a) { return (uint32_t)vceqzs_f32(a); } -// CHECK-LABEL: define i64 @test_vceqzd_f64(double %a) #0 { +// CHECK-LABEL: @test_vceqzd_f64( // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCEQZ_I]] @@ -18374,7 +17453,7 @@ uint64_t test_vceqzd_f64(float64_t a) { return (uint64_t)vceqzd_f64(a); } -// CHECK-LABEL: define i32 @test_vcges_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcges_f32( // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCMPD_I]] @@ -18382,7 +17461,7 @@ uint32_t test_vcges_f32(float32_t a, float32_t b) { return (uint32_t)vcges_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcged_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcged_f64( // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCMPD_I]] @@ -18390,7 +17469,7 @@ uint64_t test_vcged_f64(float64_t a, float64_t b) { return (uint64_t)vcged_f64(a, b); } -// CHECK-LABEL: define i32 @test_vcgezs_f32(float %a) #0 { +// CHECK-LABEL: @test_vcgezs_f32( // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCGEZ_I]] @@ -18398,7 +17477,7 @@ uint32_t test_vcgezs_f32(float32_t a) { return (uint32_t)vcgezs_f32(a); } -// CHECK-LABEL: define i64 @test_vcgezd_f64(double %a) #0 { +// CHECK-LABEL: @test_vcgezd_f64( // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCGEZ_I]] @@ -18406,7 +17485,7 @@ uint64_t test_vcgezd_f64(float64_t a) { return (uint64_t)vcgezd_f64(a); } -// CHECK-LABEL: define i32 @test_vcgts_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcgts_f32( // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCMPD_I]] @@ -18414,7 +17493,7 @@ uint32_t test_vcgts_f32(float32_t a, float32_t b) { return (uint32_t)vcgts_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcgtd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcgtd_f64( // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCMPD_I]] @@ -18422,7 +17501,7 @@ uint64_t test_vcgtd_f64(float64_t a, float64_t b) { return (uint64_t)vcgtd_f64(a, b); } -// CHECK-LABEL: define i32 @test_vcgtzs_f32(float %a) #0 { +// CHECK-LABEL: @test_vcgtzs_f32( // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCGTZ_I]] @@ -18430,7 +17509,7 @@ uint32_t test_vcgtzs_f32(float32_t a) { return (uint32_t)vcgtzs_f32(a); } -// CHECK-LABEL: define i64 @test_vcgtzd_f64(double %a) #0 { +// CHECK-LABEL: @test_vcgtzd_f64( // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCGTZ_I]] @@ -18438,7 +17517,7 @@ uint64_t test_vcgtzd_f64(float64_t a) { return (uint64_t)vcgtzd_f64(a); } -// CHECK-LABEL: define i32 @test_vcles_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcles_f32( // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCMPD_I]] @@ -18446,7 +17525,7 @@ uint32_t test_vcles_f32(float32_t a, float32_t b) { return (uint32_t)vcles_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcled_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcled_f64( // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCMPD_I]] @@ -18454,7 +17533,7 @@ uint64_t test_vcled_f64(float64_t a, float64_t b) { return (uint64_t)vcled_f64(a, b); } -// CHECK-LABEL: define i32 @test_vclezs_f32(float %a) #0 { +// CHECK-LABEL: @test_vclezs_f32( // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCLEZ_I]] @@ -18462,7 +17541,7 @@ uint32_t test_vclezs_f32(float32_t a) { return (uint32_t)vclezs_f32(a); } -// CHECK-LABEL: define i64 @test_vclezd_f64(double %a) #0 { +// CHECK-LABEL: @test_vclezd_f64( // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCLEZ_I]] @@ -18470,7 +17549,7 @@ uint64_t test_vclezd_f64(float64_t a) { return (uint64_t)vclezd_f64(a); } -// CHECK-LABEL: define i32 @test_vclts_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vclts_f32( // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCMPD_I]] @@ -18478,7 +17557,7 @@ uint32_t test_vclts_f32(float32_t a, float32_t b) { return (uint32_t)vclts_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcltd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcltd_f64( // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCMPD_I]] @@ -18486,7 +17565,7 @@ uint64_t test_vcltd_f64(float64_t a, float64_t b) { return (uint64_t)vcltd_f64(a, b); } -// CHECK-LABEL: define i32 @test_vcltzs_f32(float %a) #0 { +// CHECK-LABEL: @test_vcltzs_f32( // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32 // CHECK: ret i32 [[VCLTZ_I]] @@ -18494,7 +17573,7 @@ uint32_t test_vcltzs_f32(float32_t a) { return (uint32_t)vcltzs_f32(a); } -// CHECK-LABEL: define i64 @test_vcltzd_f64(double %a) #0 { +// CHECK-LABEL: @test_vcltzd_f64( // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 // CHECK: ret i64 [[VCLTZ_I]] @@ -18502,70 +17581,70 @@ uint64_t test_vcltzd_f64(float64_t a) { return (uint64_t)vcltzd_f64(a); } -// CHECK-LABEL: define i32 @test_vcages_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcages_f32( // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4 // CHECK: ret i32 [[VCAGES_F32_I]] uint32_t test_vcages_f32(float32_t a, float32_t b) { return (uint32_t)vcages_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcaged_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcaged_f64( // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4 // CHECK: ret i64 [[VCAGED_F64_I]] uint64_t test_vcaged_f64(float64_t a, float64_t b) { return (uint64_t)vcaged_f64(a, b); } -// CHECK-LABEL: define i32 @test_vcagts_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcagts_f32( // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4 // CHECK: ret i32 [[VCAGTS_F32_I]] uint32_t test_vcagts_f32(float32_t a, float32_t b) { return (uint32_t)vcagts_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcagtd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcagtd_f64( // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4 // CHECK: ret i64 [[VCAGTD_F64_I]] uint64_t test_vcagtd_f64(float64_t a, float64_t b) { return (uint64_t)vcagtd_f64(a, b); } -// CHECK-LABEL: define i32 @test_vcales_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcales_f32( // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4 // CHECK: ret i32 [[VCALES_F32_I]] uint32_t test_vcales_f32(float32_t a, float32_t b) { return (uint32_t)vcales_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcaled_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcaled_f64( // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4 // CHECK: ret i64 [[VCALED_F64_I]] uint64_t test_vcaled_f64(float64_t a, float64_t b) { return (uint64_t)vcaled_f64(a, b); } -// CHECK-LABEL: define i32 @test_vcalts_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vcalts_f32( // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4 // CHECK: ret i32 [[VCALTS_F32_I]] uint32_t test_vcalts_f32(float32_t a, float32_t b) { return (uint32_t)vcalts_f32(a, b); } -// CHECK-LABEL: define i64 @test_vcaltd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vcaltd_f64( // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4 // CHECK: ret i64 [[VCALTD_F64_I]] uint64_t test_vcaltd_f64(float64_t a, float64_t b) { return (uint64_t)vcaltd_f64(a, b); } -// CHECK-LABEL: define i64 @test_vshrd_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vshrd_n_s64( // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1 // CHECK: ret i64 [[SHRD_N]] int64_t test_vshrd_n_s64(int64_t a) { return (int64_t)vshrd_n_s64(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], @@ -18574,22 +17653,20 @@ int64x1_t test_vshr_n_s64(int64x1_t a) { return vshr_n_s64(a, 1); } -// CHECK-LABEL: define i64 @test_vshrd_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vshrd_n_u64( // CHECK: ret i64 0 uint64_t test_vshrd_n_u64(uint64_t a) { - return (uint64_t)vshrd_n_u64(a, 64); } -// CHECK-LABEL: define i64 @test_vshrd_n_u64_2() #0 { +// CHECK-LABEL: @test_vshrd_n_u64_2( // CHECK: ret i64 0 uint64_t test_vshrd_n_u64_2() { - uint64_t a = UINT64_C(0xf000000000000000); return vshrd_n_u64(a, 64); } -// CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], @@ -18598,14 +17675,14 @@ uint64x1_t test_vshr_n_u64(uint64x1_t a) { return vshr_n_u64(a, 1); } -// CHECK-LABEL: define i64 @test_vrshrd_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vrshrd_n_s64( // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63) // CHECK: ret i64 [[VRSHR_N]] int64_t test_vrshrd_n_s64(int64_t a) { return (int64_t)vrshrd_n_s64(a, 63); } -// CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) @@ -18614,14 +17691,14 @@ int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); } -// CHECK-LABEL: define i64 @test_vrshrd_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vrshrd_n_u64( // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63) // CHECK: ret i64 [[VRSHR_N]] uint64_t test_vrshrd_n_u64(uint64_t a) { return (uint64_t)vrshrd_n_u64(a, 63); } -// CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) @@ -18630,7 +17707,7 @@ uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); } -// CHECK-LABEL: define i64 @test_vsrad_n_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsrad_n_s64( // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] // CHECK: ret i64 [[TMP0]] @@ -18638,7 +17715,7 @@ int64_t test_vsrad_n_s64(int64_t a, int64_t b) { return (int64_t)vsrad_n_s64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -18650,7 +17727,7 @@ int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { return vsra_n_s64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vsrad_n_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsrad_n_u64( // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] // CHECK: ret i64 [[TMP0]] @@ -18658,14 +17735,13 @@ uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) { return (uint64_t)vsrad_n_u64(a, b, 63); } -// CHECK-LABEL: define i64 @test_vsrad_n_u64_2(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsrad_n_u64_2( // CHECK: ret i64 %a uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { - return (uint64_t)vsrad_n_u64(a, b, 64); } -// CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -18677,7 +17753,7 @@ uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { return vsra_n_u64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vrsrad_n_s64( // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63) // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] // CHECK: ret i64 [[TMP1]] @@ -18685,7 +17761,7 @@ int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { return (int64_t)vrsrad_n_s64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -18697,7 +17773,7 @@ int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { return vrsra_n_s64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vrsrad_n_u64( // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63) // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] // CHECK: ret i64 [[TMP1]] @@ -18705,7 +17781,7 @@ uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { return (uint64_t)vrsrad_n_u64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -18717,13 +17793,14 @@ uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { return vrsra_n_u64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vshld_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vshld_n_s64( // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1 // CHECK: ret i64 [[SHLD_N]] int64_t test_vshld_n_s64(int64_t a) { return (int64_t)vshld_n_s64(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 { + +// CHECK-LABEL: @test_vshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], @@ -18732,14 +17809,14 @@ int64x1_t test_vshl_n_s64(int64x1_t a) { return vshl_n_s64(a, 1); } -// CHECK-LABEL: define i64 @test_vshld_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vshld_n_u64( // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63 // CHECK: ret i64 [[SHLD_N]] uint64_t test_vshld_n_u64(uint64_t a) { return (uint64_t)vshld_n_u64(a, 63); } -// CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], @@ -18748,7 +17825,7 @@ uint64x1_t test_vshl_n_u64(uint64x1_t a) { return vshl_n_u64(a, 1); } -// CHECK-LABEL: define i8 @test_vqshlb_n_s8(i8 %a) #0 { +// CHECK-LABEL: @test_vqshlb_n_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> ) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0 @@ -18757,7 +17834,7 @@ int8_t test_vqshlb_n_s8(int8_t a) { return (int8_t)vqshlb_n_s8(a, 7); } -// CHECK-LABEL: define i16 @test_vqshlh_n_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqshlh_n_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> ) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0 @@ -18766,35 +17843,35 @@ int16_t test_vqshlh_n_s16(int16_t a) { return (int16_t)vqshlh_n_s16(a, 15); } -// CHECK-LABEL: define i32 @test_vqshls_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqshls_n_s32( // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31) // CHECK: ret i32 [[VQSHLS_N_S32]] int32_t test_vqshls_n_s32(int32_t a) { return (int32_t)vqshls_n_s32(a, 31); } -// CHECK-LABEL: define i64 @test_vqshld_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqshld_n_s64( // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63) // CHECK: ret i64 [[VQSHL_N]] int64_t test_vqshld_n_s64(int64_t a) { return (int64_t)vqshld_n_s64(a, 63); } -// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s8( // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) // CHECK: ret <8 x i8> [[VQSHL_N]] int8x8_t test_vqshl_n_s8(int8x8_t a) { return vqshl_n_s8(a, 0); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s8( // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) // CHECK: ret <16 x i8> [[VQSHL_N]] int8x16_t test_vqshlq_n_s8(int8x16_t a) { return vqshlq_n_s8(a, 0); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) @@ -18803,7 +17880,7 @@ int16x4_t test_vqshl_n_s16(int16x4_t a) { return vqshl_n_s16(a, 0); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) @@ -18812,7 +17889,7 @@ int16x8_t test_vqshlq_n_s16(int16x8_t a) { return vqshlq_n_s16(a, 0); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) @@ -18821,7 +17898,7 @@ int32x2_t test_vqshl_n_s32(int32x2_t a) { return vqshl_n_s32(a, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) @@ -18830,7 +17907,7 @@ int32x4_t test_vqshlq_n_s32(int32x4_t a) { return vqshlq_n_s32(a, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) @@ -18839,21 +17916,21 @@ int64x2_t test_vqshlq_n_s64(int64x2_t a) { return vqshlq_n_s64(a, 0); } -// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u8( // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) // CHECK: ret <8 x i8> [[VQSHL_N]] uint8x8_t test_vqshl_n_u8(uint8x8_t a) { return vqshl_n_u8(a, 0); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u8( // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) // CHECK: ret <16 x i8> [[VQSHL_N]] uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { return vqshlq_n_u8(a, 0); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) @@ -18862,7 +17939,7 @@ uint16x4_t test_vqshl_n_u16(uint16x4_t a) { return vqshl_n_u16(a, 0); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) @@ -18871,7 +17948,7 @@ uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { return vqshlq_n_u16(a, 0); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) @@ -18880,7 +17957,7 @@ uint32x2_t test_vqshl_n_u32(uint32x2_t a) { return vqshl_n_u32(a, 0); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) @@ -18889,7 +17966,7 @@ uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { return vqshlq_n_u32(a, 0); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) @@ -18898,7 +17975,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { return vqshlq_n_u64(a, 0); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) @@ -18907,7 +17984,7 @@ int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); } -// CHECK-LABEL: define i8 @test_vqshlb_n_u8(i8 %a) #0 { +// CHECK-LABEL: @test_vqshlb_n_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> ) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0 @@ -18916,7 +17993,7 @@ uint8_t test_vqshlb_n_u8(uint8_t a) { return (uint8_t)vqshlb_n_u8(a, 7); } -// CHECK-LABEL: define i16 @test_vqshlh_n_u16(i16 %a) #0 { +// CHECK-LABEL: @test_vqshlh_n_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> ) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0 @@ -18925,21 +18002,21 @@ uint16_t test_vqshlh_n_u16(uint16_t a) { return (uint16_t)vqshlh_n_u16(a, 15); } -// CHECK-LABEL: define i32 @test_vqshls_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vqshls_n_u32( // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31) // CHECK: ret i32 [[VQSHLS_N_U32]] uint32_t test_vqshls_n_u32(uint32_t a) { return (uint32_t)vqshls_n_u32(a, 31); } -// CHECK-LABEL: define i64 @test_vqshld_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vqshld_n_u64( // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63) // CHECK: ret i64 [[VQSHL_N]] uint64_t test_vqshld_n_u64(uint64_t a) { return (uint64_t)vqshld_n_u64(a, 63); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) @@ -18948,7 +18025,7 @@ uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); } -// CHECK-LABEL: define i8 @test_vqshlub_n_s8(i8 %a) #0 { +// CHECK-LABEL: @test_vqshlub_n_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> ) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0 @@ -18957,7 +18034,7 @@ int8_t test_vqshlub_n_s8(int8_t a) { return (int8_t)vqshlub_n_s8(a, 7); } -// CHECK-LABEL: define i16 @test_vqshluh_n_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqshluh_n_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> ) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0 @@ -18966,21 +18043,21 @@ int16_t test_vqshluh_n_s16(int16_t a) { return (int16_t)vqshluh_n_s16(a, 15); } -// CHECK-LABEL: define i32 @test_vqshlus_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqshlus_n_s32( // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31) // CHECK: ret i32 [[VQSHLUS_N_S32]] int32_t test_vqshlus_n_s32(int32_t a) { return (int32_t)vqshlus_n_s32(a, 31); } -// CHECK-LABEL: define i64 @test_vqshlud_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqshlud_n_s64( // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63) // CHECK: ret i64 [[VQSHLU_N]] int64_t test_vqshlud_n_s64(int64_t a) { return (int64_t)vqshlud_n_s64(a, 63); } -// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) @@ -18989,7 +18066,7 @@ uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); } -// CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsrid_n_s64( // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63) @@ -18999,7 +18076,7 @@ int64_t test_vsrid_n_s64(int64_t a, int64_t b) { return (int64_t)vsrid_n_s64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -19010,7 +18087,7 @@ int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { return vsri_n_s64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vsrid_n_u64( // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63) @@ -19020,7 +18097,7 @@ uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) { return (uint64_t)vsrid_n_u64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -19031,7 +18108,7 @@ uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { return vsri_n_u64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vslid_n_s64( // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63) @@ -19041,7 +18118,7 @@ int64_t test_vslid_n_s64(int64_t a, int64_t b) { return (int64_t)vslid_n_s64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -19052,7 +18129,7 @@ int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { return vsli_n_s64(a, b, 1); } -// CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 { +// CHECK-LABEL: @test_vslid_n_u64( // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63) @@ -19062,7 +18139,7 @@ uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) { return (uint64_t)vslid_n_u64(a, b, 63); } -// CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -19073,7 +18150,7 @@ uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { return vsli_n_u64(a, b, 1); } -// CHECK-LABEL: define i8 @test_vqshrnh_n_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqshrnh_n_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0 @@ -19082,7 +18159,7 @@ int8_t test_vqshrnh_n_s16(int16_t a) { return (int8_t)vqshrnh_n_s16(a, 8); } -// CHECK-LABEL: define i16 @test_vqshrns_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqshrns_n_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0 @@ -19091,14 +18168,14 @@ int16_t test_vqshrns_n_s32(int32_t a) { return (int16_t)vqshrns_n_s32(a, 16); } -// CHECK-LABEL: define i32 @test_vqshrnd_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqshrnd_n_s64( // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32) // CHECK: ret i32 [[VQSHRND_N_S64]] int32_t test_vqshrnd_n_s64(int64_t a) { return (int32_t)vqshrnd_n_s64(a, 32); } -// CHECK-LABEL: define i8 @test_vqshrnh_n_u16(i16 %a) #0 { +// CHECK-LABEL: @test_vqshrnh_n_u16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0 @@ -19107,7 +18184,7 @@ uint8_t test_vqshrnh_n_u16(uint16_t a) { return (uint8_t)vqshrnh_n_u16(a, 8); } -// CHECK-LABEL: define i16 @test_vqshrns_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vqshrns_n_u32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0 @@ -19116,14 +18193,14 @@ uint16_t test_vqshrns_n_u32(uint32_t a) { return (uint16_t)vqshrns_n_u32(a, 16); } -// CHECK-LABEL: define i32 @test_vqshrnd_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vqshrnd_n_u64( // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32) // CHECK: ret i32 [[VQSHRND_N_U64]] uint32_t test_vqshrnd_n_u64(uint64_t a) { return (uint32_t)vqshrnd_n_u64(a, 32); } -// CHECK-LABEL: define i8 @test_vqrshrnh_n_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqrshrnh_n_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0 @@ -19132,7 +18209,7 @@ int8_t test_vqrshrnh_n_s16(int16_t a) { return (int8_t)vqrshrnh_n_s16(a, 8); } -// CHECK-LABEL: define i16 @test_vqrshrns_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqrshrns_n_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0 @@ -19141,14 +18218,14 @@ int16_t test_vqrshrns_n_s32(int32_t a) { return (int16_t)vqrshrns_n_s32(a, 16); } -// CHECK-LABEL: define i32 @test_vqrshrnd_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqrshrnd_n_s64( // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32) // CHECK: ret i32 [[VQRSHRND_N_S64]] int32_t test_vqrshrnd_n_s64(int64_t a) { return (int32_t)vqrshrnd_n_s64(a, 32); } -// CHECK-LABEL: define i8 @test_vqrshrnh_n_u16(i16 %a) #0 { +// CHECK-LABEL: @test_vqrshrnh_n_u16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0 @@ -19157,7 +18234,7 @@ uint8_t test_vqrshrnh_n_u16(uint16_t a) { return (uint8_t)vqrshrnh_n_u16(a, 8); } -// CHECK-LABEL: define i16 @test_vqrshrns_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vqrshrns_n_u32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0 @@ -19166,14 +18243,14 @@ uint16_t test_vqrshrns_n_u32(uint32_t a) { return (uint16_t)vqrshrns_n_u32(a, 16); } -// CHECK-LABEL: define i32 @test_vqrshrnd_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vqrshrnd_n_u64( // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32) // CHECK: ret i32 [[VQRSHRND_N_U64]] uint32_t test_vqrshrnd_n_u64(uint64_t a) { return (uint32_t)vqrshrnd_n_u64(a, 32); } -// CHECK-LABEL: define i8 @test_vqshrunh_n_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqshrunh_n_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0 @@ -19182,7 +18259,7 @@ int8_t test_vqshrunh_n_s16(int16_t a) { return (int8_t)vqshrunh_n_s16(a, 8); } -// CHECK-LABEL: define i16 @test_vqshruns_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqshruns_n_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0 @@ -19191,14 +18268,14 @@ int16_t test_vqshruns_n_s32(int32_t a) { return (int16_t)vqshruns_n_s32(a, 16); } -// CHECK-LABEL: define i32 @test_vqshrund_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqshrund_n_s64( // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32) // CHECK: ret i32 [[VQSHRUND_N_S64]] int32_t test_vqshrund_n_s64(int64_t a) { return (int32_t)vqshrund_n_s64(a, 32); } -// CHECK-LABEL: define i8 @test_vqrshrunh_n_s16(i16 %a) #0 { +// CHECK-LABEL: @test_vqrshrunh_n_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0 @@ -19207,7 +18284,7 @@ int8_t test_vqrshrunh_n_s16(int16_t a) { return (int8_t)vqrshrunh_n_s16(a, 8); } -// CHECK-LABEL: define i16 @test_vqrshruns_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vqrshruns_n_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0 @@ -19216,2894 +18293,2855 @@ int16_t test_vqrshruns_n_s32(int32_t a) { return (int16_t)vqrshruns_n_s32(a, 16); } -// CHECK-LABEL: define i32 @test_vqrshrund_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vqrshrund_n_s64( // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32) // CHECK: ret i32 [[VQRSHRUND_N_S64]] int32_t test_vqrshrund_n_s64(int64_t a) { return (int32_t)vqrshrund_n_s64(a, 32); } -// CHECK-LABEL: define float @test_vcvts_n_f32_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vcvts_n_f32_s32( // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1) // CHECK: ret float [[VCVTS_N_F32_S32]] float32_t test_vcvts_n_f32_s32(int32_t a) { return vcvts_n_f32_s32(a, 1); } -// CHECK-LABEL: define double @test_vcvtd_n_f64_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vcvtd_n_f64_s64( // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1) // CHECK: ret double [[VCVTD_N_F64_S64]] float64_t test_vcvtd_n_f64_s64(int64_t a) { return vcvtd_n_f64_s64(a, 1); } -// CHECK-LABEL: define float @test_vcvts_n_f32_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vcvts_n_f32_u32( // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32) // CHECK: ret float [[VCVTS_N_F32_U32]] float32_t test_vcvts_n_f32_u32(uint32_t a) { return vcvts_n_f32_u32(a, 32); } -// CHECK-LABEL: define double @test_vcvtd_n_f64_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vcvtd_n_f64_u64( // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64) // CHECK: ret double [[VCVTD_N_F64_U64]] float64_t test_vcvtd_n_f64_u64(uint64_t a) { return vcvtd_n_f64_u64(a, 64); } -// CHECK-LABEL: define i32 @test_vcvts_n_s32_f32(float %a) #0 { +// CHECK-LABEL: @test_vcvts_n_s32_f32( // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1) // CHECK: ret i32 [[VCVTS_N_S32_F32]] int32_t test_vcvts_n_s32_f32(float32_t a) { return (int32_t)vcvts_n_s32_f32(a, 1); } -// CHECK-LABEL: define i64 @test_vcvtd_n_s64_f64(double %a) #0 { +// CHECK-LABEL: @test_vcvtd_n_s64_f64( // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1) // CHECK: ret i64 [[VCVTD_N_S64_F64]] int64_t test_vcvtd_n_s64_f64(float64_t a) { return (int64_t)vcvtd_n_s64_f64(a, 1); } -// CHECK-LABEL: define i32 @test_vcvts_n_u32_f32(float %a) #0 { +// CHECK-LABEL: @test_vcvts_n_u32_f32( // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32) // CHECK: ret i32 [[VCVTS_N_U32_F32]] uint32_t test_vcvts_n_u32_f32(float32_t a) { return (uint32_t)vcvts_n_u32_f32(a, 32); } -// CHECK-LABEL: define i64 @test_vcvtd_n_u64_f64(double %a) #0 { +// CHECK-LABEL: @test_vcvtd_n_u64_f64( // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64) // CHECK: ret i64 [[VCVTD_N_U64_F64]] uint64_t test_vcvtd_n_u64_f64(float64_t a) { return (uint64_t)vcvtd_n_u64_f64(a, 64); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { return vreinterpret_s8_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { return vreinterpret_s8_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { return vreinterpret_s8_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u8( // CHECK: ret <8 x i8> %a int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { return vreinterpret_s8_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { return vreinterpret_s8_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { return vreinterpret_s8_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { return vreinterpret_s8_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { return vreinterpret_s8_f16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { return vreinterpret_s8_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_f64(float64x1_t a) { return vreinterpret_s8_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_p8( // CHECK: ret <8 x i8> %a int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { return vreinterpret_s8_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { return vreinterpret_s8_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) { return vreinterpret_s8_p64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { return vreinterpret_s16_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { return vreinterpret_s16_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { return vreinterpret_s16_s64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { return vreinterpret_s16_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u16( // CHECK: ret <4 x i16> %a int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { return vreinterpret_s16_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { return vreinterpret_s16_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { return vreinterpret_s16_u64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { return vreinterpret_s16_f16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { return vreinterpret_s16_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_f64(float64x1_t a) { return vreinterpret_s16_f64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { return vreinterpret_s16_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_p16( // CHECK: ret <4 x i16> %a int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { return vreinterpret_s16_p16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) { return vreinterpret_s16_p64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { return vreinterpret_s32_s8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { return vreinterpret_s32_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { return vreinterpret_s32_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { return vreinterpret_s32_u8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { return vreinterpret_s32_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u32( // CHECK: ret <2 x i32> %a int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { return vreinterpret_s32_u32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { return vreinterpret_s32_u64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { return vreinterpret_s32_f16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { return vreinterpret_s32_f32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_f64(float64x1_t a) { return vreinterpret_s32_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { return vreinterpret_s32_p8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { return vreinterpret_s32_p16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) { return vreinterpret_s32_p64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { return vreinterpret_s64_s8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { return vreinterpret_s64_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { return vreinterpret_s64_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { return vreinterpret_s64_u8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { return vreinterpret_s64_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { return vreinterpret_s64_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u64( // CHECK: ret <1 x i64> %a int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { return vreinterpret_s64_u64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { return vreinterpret_s64_f16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { return vreinterpret_s64_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_f64(float64x1_t a) { return vreinterpret_s64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { return vreinterpret_s64_p8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { return vreinterpret_s64_p16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_p64( // CHECK: ret <1 x i64> %a int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) { return vreinterpret_s64_p64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s8( // CHECK: ret <8 x i8> %a uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { return vreinterpret_u8_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { return vreinterpret_u8_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { return vreinterpret_u8_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { return vreinterpret_u8_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { return vreinterpret_u8_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { return vreinterpret_u8_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { return vreinterpret_u8_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { return vreinterpret_u8_f16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { return vreinterpret_u8_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) { return vreinterpret_u8_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_p8( // CHECK: ret <8 x i8> %a uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { return vreinterpret_u8_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { return vreinterpret_u8_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) { return vreinterpret_u8_p64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { return vreinterpret_u16_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s16( // CHECK: ret <4 x i16> %a uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { return vreinterpret_u16_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { return vreinterpret_u16_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { return vreinterpret_u16_s64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { return vreinterpret_u16_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { return vreinterpret_u16_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { return vreinterpret_u16_u64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { return vreinterpret_u16_f16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { return vreinterpret_u16_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) { return vreinterpret_u16_f64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { return vreinterpret_u16_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_p16( // CHECK: ret <4 x i16> %a uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { return vreinterpret_u16_p16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) { return vreinterpret_u16_p64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { return vreinterpret_u32_s8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { return vreinterpret_u32_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s32( // CHECK: ret <2 x i32> %a uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { return vreinterpret_u32_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { return vreinterpret_u32_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { return vreinterpret_u32_u8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { return vreinterpret_u32_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { return vreinterpret_u32_u64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { return vreinterpret_u32_f16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { return vreinterpret_u32_f32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) { return vreinterpret_u32_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { return vreinterpret_u32_p8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { return vreinterpret_u32_p16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) { return vreinterpret_u32_p64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { return vreinterpret_u64_s8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { return vreinterpret_u64_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { return vreinterpret_u64_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s64( // CHECK: ret <1 x i64> %a uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { return vreinterpret_u64_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { return vreinterpret_u64_u8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { return vreinterpret_u64_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { return vreinterpret_u64_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { return vreinterpret_u64_f16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { return vreinterpret_u64_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) { return vreinterpret_u64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { return vreinterpret_u64_p8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { return vreinterpret_u64_p16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_p64( // CHECK: ret <1 x i64> %a uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) { return vreinterpret_u64_p64(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { return vreinterpret_f16_s8(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { return vreinterpret_f16_s16(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { return vreinterpret_f16_s32(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { return vreinterpret_f16_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { return vreinterpret_f16_u8(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { return vreinterpret_f16_u16(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { return vreinterpret_f16_u32(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { return vreinterpret_f16_u64(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { return vreinterpret_f16_f32(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_f64(float64x1_t a) { return vreinterpret_f16_f64(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { return vreinterpret_f16_p8(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { return vreinterpret_f16_p16(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) { return vreinterpret_f16_p64(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { return vreinterpret_f32_s8(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { return vreinterpret_f32_s16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { return vreinterpret_f32_s32(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { return vreinterpret_f32_s64(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { return vreinterpret_f32_u8(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { return vreinterpret_f32_u16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { return vreinterpret_f32_u32(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { return vreinterpret_f32_u64(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { return vreinterpret_f32_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_f64(float64x1_t a) { return vreinterpret_f32_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { return vreinterpret_f32_p8(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { return vreinterpret_f32_p16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) { return vreinterpret_f32_p64(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_s8(int8x8_t a) { return vreinterpret_f64_s8(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_s16(int16x4_t a) { return vreinterpret_f64_s16(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_s32(int32x2_t a) { return vreinterpret_f64_s32(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_s64(int64x1_t a) { return vreinterpret_f64_s64(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) { return vreinterpret_f64_u8(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) { return vreinterpret_f64_u16(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) { return vreinterpret_f64_u32(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) { return vreinterpret_f64_u64(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_f16(float16x4_t a) { return vreinterpret_f64_f16(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_f32(float32x2_t a) { return vreinterpret_f64_f32(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) { return vreinterpret_f64_p8(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) { return vreinterpret_f64_p16(a); } -// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f64_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> // CHECK: ret <1 x double> [[TMP0]] float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) { return vreinterpret_f64_p64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s8( // CHECK: ret <8 x i8> %a poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { return vreinterpret_p8_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { return vreinterpret_p8_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { return vreinterpret_p8_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { return vreinterpret_p8_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u8( // CHECK: ret <8 x i8> %a poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { return vreinterpret_p8_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { return vreinterpret_p8_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { return vreinterpret_p8_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { return vreinterpret_p8_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { return vreinterpret_p8_f16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { return vreinterpret_p8_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) { return vreinterpret_p8_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { return vreinterpret_p8_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) { return vreinterpret_p8_p64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { return vreinterpret_p16_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s16( // CHECK: ret <4 x i16> %a poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { return vreinterpret_p16_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { return vreinterpret_p16_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { return vreinterpret_p16_s64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { return vreinterpret_p16_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u16( // CHECK: ret <4 x i16> %a poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { return vreinterpret_p16_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { return vreinterpret_p16_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { return vreinterpret_p16_u64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { return vreinterpret_p16_f16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { return vreinterpret_p16_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) { return vreinterpret_p16_f64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { return vreinterpret_p16_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) { return vreinterpret_p16_p64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) { return vreinterpret_p64_s8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) { return vreinterpret_p64_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) { return vreinterpret_p64_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_s64( // CHECK: ret <1 x i64> %a poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) { return vreinterpret_p64_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) { return vreinterpret_p64_u8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) { return vreinterpret_p64_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) { return vreinterpret_p64_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_u64( // CHECK: ret <1 x i64> %a poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) { return vreinterpret_p64_u64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) { return vreinterpret_p64_f16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) { return vreinterpret_p64_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) { return vreinterpret_p64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) { return vreinterpret_p64_p8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p64_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) { return vreinterpret_p64_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { return vreinterpretq_s8_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { return vreinterpretq_s8_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { return vreinterpretq_s8_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u8( // CHECK: ret <16 x i8> %a int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { return vreinterpretq_s8_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { return vreinterpretq_s8_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { return vreinterpretq_s8_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { return vreinterpretq_s8_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { return vreinterpretq_s8_f16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { return vreinterpretq_s8_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) { return vreinterpretq_s8_f64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_p8( // CHECK: ret <16 x i8> %a int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { return vreinterpretq_s8_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { return vreinterpretq_s8_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) { return vreinterpretq_s8_p64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { return vreinterpretq_s16_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { return vreinterpretq_s16_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { return vreinterpretq_s16_s64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { return vreinterpretq_s16_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u16( // CHECK: ret <8 x i16> %a int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { return vreinterpretq_s16_u16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { return vreinterpretq_s16_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { return vreinterpretq_s16_u64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { return vreinterpretq_s16_f16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { return vreinterpretq_s16_f32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) { return vreinterpretq_s16_f64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { return vreinterpretq_s16_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_p16( // CHECK: ret <8 x i16> %a int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { return vreinterpretq_s16_p16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) { return vreinterpretq_s16_p64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { return vreinterpretq_s32_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { return vreinterpretq_s32_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { return vreinterpretq_s32_s64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { return vreinterpretq_s32_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { return vreinterpretq_s32_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u32( // CHECK: ret <4 x i32> %a int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { return vreinterpretq_s32_u32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { return vreinterpretq_s32_u64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { return vreinterpretq_s32_f16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { return vreinterpretq_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) { return vreinterpretq_s32_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { return vreinterpretq_s32_p8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { return vreinterpretq_s32_p16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) { return vreinterpretq_s32_p64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { return vreinterpretq_s64_s8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { return vreinterpretq_s64_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { return vreinterpretq_s64_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { return vreinterpretq_s64_u8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { return vreinterpretq_s64_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { return vreinterpretq_s64_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u64( // CHECK: ret <2 x i64> %a int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { return vreinterpretq_s64_u64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { return vreinterpretq_s64_f16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { return vreinterpretq_s64_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) { return vreinterpretq_s64_f64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { return vreinterpretq_s64_p8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { return vreinterpretq_s64_p16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_p64( // CHECK: ret <2 x i64> %a int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) { return vreinterpretq_s64_p64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s8( // CHECK: ret <16 x i8> %a uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { return vreinterpretq_u8_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { return vreinterpretq_u8_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { return vreinterpretq_u8_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { return vreinterpretq_u8_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { return vreinterpretq_u8_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { return vreinterpretq_u8_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { return vreinterpretq_u8_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { return vreinterpretq_u8_f16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { return vreinterpretq_u8_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) { return vreinterpretq_u8_f64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_p8( // CHECK: ret <16 x i8> %a uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { return vreinterpretq_u8_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { return vreinterpretq_u8_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) { return vreinterpretq_u8_p64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { return vreinterpretq_u16_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s16( // CHECK: ret <8 x i16> %a uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { return vreinterpretq_u16_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { return vreinterpretq_u16_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { return vreinterpretq_u16_s64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { return vreinterpretq_u16_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { return vreinterpretq_u16_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { return vreinterpretq_u16_u64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { return vreinterpretq_u16_f16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { return vreinterpretq_u16_f32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) { return vreinterpretq_u16_f64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { return vreinterpretq_u16_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_p16( // CHECK: ret <8 x i16> %a uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { return vreinterpretq_u16_p16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) { return vreinterpretq_u16_p64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { return vreinterpretq_u32_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { return vreinterpretq_u32_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s32( // CHECK: ret <4 x i32> %a uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { return vreinterpretq_u32_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { return vreinterpretq_u32_s64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { return vreinterpretq_u32_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { return vreinterpretq_u32_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { return vreinterpretq_u32_u64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { return vreinterpretq_u32_f16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { return vreinterpretq_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) { return vreinterpretq_u32_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { return vreinterpretq_u32_p8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { return vreinterpretq_u32_p16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) { return vreinterpretq_u32_p64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { return vreinterpretq_u64_s8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { return vreinterpretq_u64_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { return vreinterpretq_u64_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s64( // CHECK: ret <2 x i64> %a uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { return vreinterpretq_u64_s64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { return vreinterpretq_u64_u8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { return vreinterpretq_u64_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { return vreinterpretq_u64_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { return vreinterpretq_u64_f16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { return vreinterpretq_u64_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) { return vreinterpretq_u64_f64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { return vreinterpretq_u64_p8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { return vreinterpretq_u64_p16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_p64( // CHECK: ret <2 x i64> %a uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) { return vreinterpretq_u64_p64(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { return vreinterpretq_f16_s8(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { return vreinterpretq_f16_s16(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { return vreinterpretq_f16_s32(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { return vreinterpretq_f16_s64(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { return vreinterpretq_f16_u8(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { return vreinterpretq_f16_u16(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { return vreinterpretq_f16_u32(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { return vreinterpretq_f16_u64(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { return vreinterpretq_f16_f32(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) { return vreinterpretq_f16_f64(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { return vreinterpretq_f16_p8(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { return vreinterpretq_f16_p16(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) { return vreinterpretq_f16_p64(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { return vreinterpretq_f32_s8(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { return vreinterpretq_f32_s16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { return vreinterpretq_f32_s32(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { return vreinterpretq_f32_s64(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { return vreinterpretq_f32_u8(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { return vreinterpretq_f32_u16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { return vreinterpretq_f32_u32(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { return vreinterpretq_f32_u64(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { return vreinterpretq_f32_f16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) { return vreinterpretq_f32_f64(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { return vreinterpretq_f32_p8(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { return vreinterpretq_f32_p16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) { return vreinterpretq_f32_p64(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) { return vreinterpretq_f64_s8(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) { return vreinterpretq_f64_s16(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) { return vreinterpretq_f64_s32(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) { return vreinterpretq_f64_s64(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) { return vreinterpretq_f64_u8(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) { return vreinterpretq_f64_u16(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) { return vreinterpretq_f64_u32(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) { return vreinterpretq_f64_u64(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) { return vreinterpretq_f64_f16(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) { return vreinterpretq_f64_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) { return vreinterpretq_f64_p8(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) { return vreinterpretq_f64_p16(a); } -// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f64_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> // CHECK: ret <2 x double> [[TMP0]] float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) { return vreinterpretq_f64_p64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s8( // CHECK: ret <16 x i8> %a poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { return vreinterpretq_p8_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { return vreinterpretq_p8_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { return vreinterpretq_p8_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { return vreinterpretq_p8_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u8( // CHECK: ret <16 x i8> %a poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { return vreinterpretq_p8_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { return vreinterpretq_p8_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { return vreinterpretq_p8_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { return vreinterpretq_p8_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { return vreinterpretq_p8_f16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { return vreinterpretq_p8_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) { return vreinterpretq_p8_f64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { return vreinterpretq_p8_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) { return vreinterpretq_p8_p64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { return vreinterpretq_p16_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s16( // CHECK: ret <8 x i16> %a poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { return vreinterpretq_p16_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { return vreinterpretq_p16_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { return vreinterpretq_p16_s64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { return vreinterpretq_p16_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u16( // CHECK: ret <8 x i16> %a poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { return vreinterpretq_p16_u16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { return vreinterpretq_p16_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { return vreinterpretq_p16_u64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { return vreinterpretq_p16_f16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { return vreinterpretq_p16_f32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) { return vreinterpretq_p16_f64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { return vreinterpretq_p16_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) { return vreinterpretq_p16_p64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) { return vreinterpretq_p64_s8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) { return vreinterpretq_p64_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) { return vreinterpretq_p64_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_s64( // CHECK: ret <2 x i64> %a poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) { return vreinterpretq_p64_s64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) { return vreinterpretq_p64_u8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) { return vreinterpretq_p64_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) { return vreinterpretq_p64_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_u64( // CHECK: ret <2 x i64> %a poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) { return vreinterpretq_p64_u64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) { return vreinterpretq_p64_f16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) { return vreinterpretq_p64_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) { return vreinterpretq_p64_f64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) { return vreinterpretq_p64_p8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p64_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) { return vreinterpretq_p64_p16(a); } -// CHECK-LABEL: define float @test_vabds_f32(float %a, float %b) #0 { +// CHECK-LABEL: @test_vabds_f32( // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4 // CHECK: ret float [[VABDS_F32_I]] float32_t test_vabds_f32(float32_t a, float32_t b) { return vabds_f32(a, b); } -// CHECK-LABEL: define double @test_vabdd_f64(double %a, double %b) #0 { +// CHECK-LABEL: @test_vabdd_f64( // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4 // CHECK: ret double [[VABDD_F64_I]] float64_t test_vabdd_f64(float64_t a, float64_t b) { return vabdd_f64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vuqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]]) #4 +// CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: ret <1 x i64> [[VUQADD2_I]] int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) { return vuqadd_s64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]]) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: ret <1 x i64> [[VSQADD2_I]] uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) { return vsqadd_u64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vsqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsqadd_u8( // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VSQADD_I]] uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) { return vsqadd_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vsqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsqaddq_u8( // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VSQADD_I]] uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { return vsqaddq_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]]) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i16> [[VSQADD2_I]] uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { return vsqadd_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]]) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i16> [[VSQADD2_I]] uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { return vsqaddq_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]]) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i32> [[VSQADD2_I]] uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { return vsqadd_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]]) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: ret <4 x i32> [[VSQADD2_I]] uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { return vsqaddq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]]) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: ret <2 x i64> [[VSQADD2_I]] uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { return vsqaddq_u64(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vabs_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vabs_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a) #4 // CHECK: ret <1 x i64> [[VABS1_I]] int64x1_t test_vabs_s64(int64x1_t a) { return vabs_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vqabs_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqabs_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]]) #4 +// CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a) #4 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP1]] +// CHECK: ret <1 x i64> [[VQABS_V1_I]] int64x1_t test_vqabs_s64(int64x1_t a) { return vqabs_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vqneg_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqneg_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]]) #4 +// CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a) #4 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP1]] +// CHECK: ret <1 x i64> [[VQNEG_V1_I]] int64x1_t test_vqneg_s64(int64x1_t a) { return vqneg_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vneg_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vneg_s64( // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a // CHECK: ret <1 x i64> [[SUB_I]] int64x1_t test_vneg_s64(int64x1_t a) { return vneg_s64(a); } -// CHECK-LABEL: define float @test_vaddv_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vaddv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VADDV_F32_I]] float32_t test_vaddv_f32(float32x2_t a) { return vaddv_f32(a); } -// CHECK-LABEL: define float @test_vaddvq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vaddvq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[TMP1]]) #4 +// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a) #4 // CHECK: ret float [[VADDVQ_F32_I]] float32_t test_vaddvq_f32(float32x4_t a) { return vaddvq_f32(a); } -// CHECK-LABEL: define double @test_vaddvq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vaddvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VADDVQ_F64_I]] float64_t test_vaddvq_f64(float64x2_t a) { return vaddvq_f64(a); } -// CHECK-LABEL: define float @test_vmaxv_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vmaxv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VMAXV_F32_I]] float32_t test_vmaxv_f32(float32x2_t a) { return vmaxv_f32(a); } -// CHECK-LABEL: define double @test_vmaxvq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vmaxvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VMAXVQ_F64_I]] float64_t test_vmaxvq_f64(float64x2_t a) { return vmaxvq_f64(a); } -// CHECK-LABEL: define float @test_vminv_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vminv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VMINV_F32_I]] float32_t test_vminv_f32(float32x2_t a) { return vminv_f32(a); } -// CHECK-LABEL: define double @test_vminvq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vminvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VMINVQ_F64_I]] float64_t test_vminvq_f64(float64x2_t a) { return vminvq_f64(a); } -// CHECK-LABEL: define double @test_vmaxnmvq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vmaxnmvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VMAXNMVQ_F64_I]] float64_t test_vmaxnmvq_f64(float64x2_t a) { return vmaxnmvq_f64(a); } -// CHECK-LABEL: define float @test_vmaxnmv_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vmaxnmv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VMAXNMV_F32_I]] float32_t test_vmaxnmv_f32(float32x2_t a) { return vmaxnmv_f32(a); } -// CHECK-LABEL: define double @test_vminnmvq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vminnmvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 +// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) #4 // CHECK: ret double [[VMINNMVQ_F64_I]] float64_t test_vminnmvq_f64(float64x2_t a) { return vminnmvq_f64(a); } -// CHECK-LABEL: define float @test_vminnmv_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vminnmv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 +// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) #4 // CHECK: ret float [[VMINNMV_F32_I]] float32_t test_vminnmv_f32(float32x2_t a) { return vminnmv_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vpaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VPADDQ_V2_I]] int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { return vpaddq_s64(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vpaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VPADDQ_V2_I]] uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { return vpaddq_u64(a, b); } -// CHECK-LABEL: define i64 @test_vpaddd_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vpaddd_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 +// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) #4 // CHECK: ret i64 [[VPADDD_U64_I]] uint64_t test_vpaddd_u64(uint64x2_t a) { return vpaddd_u64(a); } -// CHECK-LABEL: define i64 @test_vaddvq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vaddvq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 +// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a) #4 // CHECK: ret i64 [[VADDVQ_S64_I]] int64_t test_vaddvq_s64(int64x2_t a) { return vaddvq_s64(a); } -// CHECK-LABEL: define i64 @test_vaddvq_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vaddvq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 +// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) #4 // CHECK: ret i64 [[VADDVQ_U64_I]] uint64_t test_vaddvq_u64(uint64x2_t a) { return vaddvq_u64(a); } -// CHECK-LABEL: define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vadd_f64( // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b // CHECK: ret <1 x double> [[ADD_I]] float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) { return vadd_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vmul_f64( // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b // CHECK: ret <1 x double> [[MUL_I]] float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) { return vmul_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vdiv_f64( // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b // CHECK: ret <1 x double> [[DIV_I]] float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) { return vdiv_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { +// CHECK-LABEL: @test_vmla_f64( // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]] // CHECK: ret <1 x double> [[ADD_I]] @@ -22111,7 +21149,7 @@ float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vmla_f64(a, b, c); } -// CHECK-LABEL: define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { +// CHECK-LABEL: @test_vmls_f64( // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]] // CHECK: ret <1 x double> [[SUB_I]] @@ -22119,220 +21157,191 @@ float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vmls_f64(a, b, c); } -// CHECK-LABEL: define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { +// CHECK-LABEL: @test_vfma_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> -// CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 -// CHECK: ret <1 x double> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) #4 +// CHECK: ret <1 x double> [[TMP3]] float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vfma_f64(a, b, c); } -// CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { +// CHECK-LABEL: @test_vfms_f64( // CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %b // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> -// CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 -// CHECK: ret <1 x double> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a) #4 +// CHECK: ret <1 x double> [[TMP3]] float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vfms_f64(a, b, c); } -// CHECK-LABEL: define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vsub_f64( // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b // CHECK: ret <1 x double> [[SUB_I]] float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { return vsub_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vabd_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]]) #4 +// CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x double> [[VABD2_I]] float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { return vabd_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vmax_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]]) #4 +// CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x double> [[VMAX2_I]] float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { return vmax_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vmin_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]]) #4 +// CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x double> [[VMIN2_I]] float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { return vmin_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vmaxnm_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]]) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x double> [[VMAXNM2_I]] float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { return vmaxnm_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vminnm_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]]) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: ret <1 x double> [[VMINNM2_I]] float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { return vminnm_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vabs_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vabs_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VABS1_I]] float64x1_t test_vabs_f64(float64x1_t a) { return vabs_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vneg_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vneg_f64( // CHECK: [[SUB_I:%.*]] = fsub <1 x double> , %a // CHECK: ret <1 x double> [[SUB_I]] float64x1_t test_vneg_f64(float64x1_t a) { return vneg_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvt_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fptosi <1 x double> [[TMP1]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64> +// CHECK: ret <1 x i64> [[TMP1]] int64x1_t test_vcvt_s64_f64(float64x1_t a) { return vcvt_s64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvt_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fptoui <1 x double> [[TMP1]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64> +// CHECK: ret <1 x i64> [[TMP1]] uint64x1_t test_vcvt_u64_f64(float64x1_t a) { return vcvt_u64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtn_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4 +// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTN1_I]] int64x1_t test_vcvtn_s64_f64(float64x1_t a) { return vcvtn_s64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtn_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4 +// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTN1_I]] uint64x1_t test_vcvtn_u64_f64(float64x1_t a) { return vcvtn_u64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtp_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4 +// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTP1_I]] int64x1_t test_vcvtp_s64_f64(float64x1_t a) { return vcvtp_s64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtp_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4 +// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTP1_I]] uint64x1_t test_vcvtp_u64_f64(float64x1_t a) { return vcvtp_u64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtm_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4 +// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTM1_I]] int64x1_t test_vcvtm_s64_f64(float64x1_t a) { return vcvtm_s64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtm_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4 +// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTM1_I]] uint64x1_t test_vcvtm_u64_f64(float64x1_t a) { return vcvtm_u64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvta_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4 +// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTA1_I]] int64x1_t test_vcvta_s64_f64(float64x1_t a) { return vcvta_s64_f64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvta_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4 +// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x i64> [[VCVTA1_I]] uint64x1_t test_vcvta_u64_f64(float64x1_t a) { return vcvta_u64_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvt_f64_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double> +// CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double> // CHECK: ret <1 x double> [[VCVT_I]] float64x1_t test_vcvt_f64_s64(int64x1_t a) { return vcvt_f64_s64(a); } -// CHECK-LABEL: define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvt_f64_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double> +// CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double> // CHECK: ret <1 x double> [[VCVT_I]] float64x1_t test_vcvt_f64_u64(uint64x1_t a) { return vcvt_f64_u64(a); } -// CHECK-LABEL: define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) @@ -22341,7 +21350,7 @@ int64x1_t test_vcvt_n_s64_f64(float64x1_t a) { return vcvt_n_s64_f64(a, 64); } -// CHECK-LABEL: define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) @@ -22350,7 +21359,7 @@ uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) { return vcvt_n_u64_f64(a, 64); } -// CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_f64_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) @@ -22359,7 +21368,7 @@ float64x1_t test_vcvt_n_f64_s64(int64x1_t a) { return vcvt_n_f64_s64(a, 64); } -// CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_f64_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) @@ -22368,189 +21377,164 @@ float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) { return vcvt_n_f64_u64(a, 64); } -// CHECK-LABEL: define <1 x double> @test_vrndn_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrndn_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> [[VRNDN_I]]) #4 +// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDN1_I]] float64x1_t test_vrndn_f64(float64x1_t a) { return vrndn_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrnda_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrnda_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]]) #4 +// CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDA1_I]] float64x1_t test_vrnda_f64(float64x1_t a) { return vrnda_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrndp_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrndp_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]]) #4 +// CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDP1_I]] float64x1_t test_vrndp_f64(float64x1_t a) { return vrndp_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrndm_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrndm_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]]) #4 +// CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDM1_I]] float64x1_t test_vrndm_f64(float64x1_t a) { return vrndm_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrndx_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrndx_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]]) #4 +// CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDX1_I]] float64x1_t test_vrndx_f64(float64x1_t a) { return vrndx_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrnd_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrnd_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]]) #4 +// CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDZ1_I]] float64x1_t test_vrnd_f64(float64x1_t a) { return vrnd_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrndi_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrndi_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_I]]) #4 +// CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRNDI1_I]] float64x1_t test_vrndi_f64(float64x1_t a) { return vrndi_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrsqrte_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrsqrte_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]]) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]] float64x1_t test_vrsqrte_f64(float64x1_t a) { return vrsqrte_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrecpe_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vrecpe_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]]) #4 +// CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VRECPE_V1_I]] float64x1_t test_vrecpe_f64(float64x1_t a) { return vrecpe_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vsqrt_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vsqrt_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP1]]) #4 +// CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) #4 // CHECK: ret <1 x double> [[VSQRT_I]] float64x1_t test_vsqrt_f64(float64x1_t a) { return vsqrt_f64(a); } -// CHECK-LABEL: define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vrecps_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]]) #4 -// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <1 x double> -// CHECK: ret <1 x double> [[TMP2]] +// CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: ret <1 x double> [[VRECPS_V2_I]] float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) { return vrecps_f64(a, b); } -// CHECK-LABEL: define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) #0 { +// CHECK-LABEL: @test_vrsqrts_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]]) #4 +// CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b) #4 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <1 x double> -// CHECK: ret <1 x double> [[TMP2]] +// CHECK: ret <1 x double> [[VRSQRTS_V2_I]] float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { return vrsqrts_f64(a, b); } -// CHECK-LABEL: define i32 @test_vminv_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vminv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a) #4 // CHECK: ret i32 [[VMINV_S32_I]] int32_t test_vminv_s32(int32x2_t a) { return vminv_s32(a); } -// CHECK-LABEL: define i32 @test_vminv_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vminv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a) #4 // CHECK: ret i32 [[VMINV_U32_I]] uint32_t test_vminv_u32(uint32x2_t a) { return vminv_u32(a); } -// CHECK-LABEL: define i32 @test_vmaxv_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmaxv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a) #4 // CHECK: ret i32 [[VMAXV_S32_I]] int32_t test_vmaxv_s32(int32x2_t a) { return vmaxv_s32(a); } -// CHECK-LABEL: define i32 @test_vmaxv_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmaxv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a) #4 // CHECK: ret i32 [[VMAXV_U32_I]] uint32_t test_vmaxv_u32(uint32x2_t a) { return vmaxv_u32(a); } -// CHECK-LABEL: define i32 @test_vaddv_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vaddv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a) #4 // CHECK: ret i32 [[VADDV_S32_I]] int32_t test_vaddv_s32(int32x2_t a) { return vaddv_s32(a); } -// CHECK-LABEL: define i32 @test_vaddv_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vaddv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a) #4 // CHECK: ret i32 [[VADDV_U32_I]] uint32_t test_vaddv_u32(uint32x2_t a) { return vaddv_u32(a); } -// CHECK-LABEL: define i64 @test_vaddlv_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vaddlv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a) #4 // CHECK: ret i64 [[VADDLV_S32_I]] int64_t test_vaddlv_s32(int32x2_t a) { return vaddlv_s32(a); } -// CHECK-LABEL: define i64 @test_vaddlv_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vaddlv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4 +// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a) #4 // CHECK: ret i64 [[VADDLV_U32_I]] uint64_t test_vaddlv_u32(uint32x2_t a) { return vaddlv_u32(a); diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 4ecf562a5d29..1342bbb0c8cb 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -6,7 +6,7 @@ #include -// CHECK-LABEL: define <8 x i8> @test_vceqz_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vceqz_s8( // CHECK: [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer // CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCEQZ_I]] @@ -14,57 +14,52 @@ uint8x8_t test_vceqz_s8(int8x8_t a) { return vceqz_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vceqz_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vceqz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCEQZ_I]] uint16x4_t test_vceqz_s16(int16x4_t a) { return vceqz_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vceqz_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vceqz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = icmp eq <2 x i32> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCEQZ_I]] uint32x2_t test_vceqz_s32(int32x2_t a) { return vceqz_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vceqz_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCEQZ_I]] uint64x1_t test_vceqz_s64(int64x1_t a) { return vceqz_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vceqz_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCEQZ_I]] uint64x1_t test_vceqz_u64(uint64x1_t a) { return vceqz_u64(a); } -// CHECK-LABEL: define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vceqz_p64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCEQZ_I]] uint64x1_t test_vceqz_p64(poly64x1_t a) { return vceqz_p64(a); } -// CHECK-LABEL: define <16 x i8> @test_vceqzq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vceqzq_s8( // CHECK: [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer // CHECK: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCEQZ_I]] @@ -72,37 +67,34 @@ uint8x16_t test_vceqzq_s8(int8x16_t a) { return vceqzq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vceqzq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vceqzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCEQZ_I]] uint16x8_t test_vceqzq_s16(int16x8_t a) { return vceqzq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vceqzq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vceqzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = icmp eq <4 x i32> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCEQZ_I]] uint32x4_t test_vceqzq_s32(int32x4_t a) { return vceqzq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vceqzq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vceqzq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCEQZ_I]] uint64x2_t test_vceqzq_s64(int64x2_t a) { return vceqzq_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vceqz_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vceqz_u8( // CHECK: [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer // CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCEQZ_I]] @@ -110,27 +102,25 @@ uint8x8_t test_vceqz_u8(uint8x8_t a) { return vceqz_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vceqz_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vceqz_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCEQZ_I]] uint16x4_t test_vceqz_u16(uint16x4_t a) { return vceqz_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vceqz_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vceqz_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = icmp eq <2 x i32> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCEQZ_I]] uint32x2_t test_vceqz_u32(uint32x2_t a) { return vceqz_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vceqzq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vceqzq_u8( // CHECK: [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer // CHECK: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCEQZ_I]] @@ -138,67 +128,61 @@ uint8x16_t test_vceqzq_u8(uint8x16_t a) { return vceqzq_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vceqzq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vceqzq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCEQZ_I]] uint16x8_t test_vceqzq_u16(uint16x8_t a) { return vceqzq_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vceqzq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vceqzq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = icmp eq <4 x i32> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCEQZ_I]] uint32x4_t test_vceqzq_u32(uint32x4_t a) { return vceqzq_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vceqzq_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vceqzq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCEQZ_I]] uint64x2_t test_vceqzq_u64(uint64x2_t a) { return vceqzq_u64(a); } -// CHECK-LABEL: define <2 x i32> @test_vceqz_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vceqz_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fcmp oeq <2 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = fcmp oeq <2 x float> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCEQZ_I]] uint32x2_t test_vceqz_f32(float32x2_t a) { return vceqz_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vceqz_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fcmp oeq <1 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = fcmp oeq <1 x double> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCEQZ_I]] uint64x1_t test_vceqz_f64(float64x1_t a) { return vceqz_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vceqzq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vceqzq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fcmp oeq <4 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = fcmp oeq <4 x float> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCEQZ_I]] uint32x4_t test_vceqzq_f32(float32x4_t a) { return vceqzq_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vceqz_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vceqz_p8( // CHECK: [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer // CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCEQZ_I]] @@ -206,7 +190,7 @@ uint8x8_t test_vceqz_p8(poly8x8_t a) { return vceqz_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vceqzq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vceqzq_p8( // CHECK: [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer // CHECK: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCEQZ_I]] @@ -214,47 +198,43 @@ uint8x16_t test_vceqzq_p8(poly8x16_t a) { return vceqzq_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vceqz_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vceqz_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCEQZ_I]] uint16x4_t test_vceqz_p16(poly16x4_t a) { return vceqz_p16(a); } -// CHECK-LABEL: define <8 x i16> @test_vceqzq_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vceqzq_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCEQZ_I]] uint16x8_t test_vceqzq_p16(poly16x8_t a) { return vceqzq_p16(a); } -// CHECK-LABEL: define <2 x i64> @test_vceqzq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vceqzq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fcmp oeq <2 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = fcmp oeq <2 x double> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCEQZ_I]] uint64x2_t test_vceqzq_f64(float64x2_t a) { return vceqzq_f64(a); } -// CHECK-LABEL: define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vceqzq_p64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer +// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCEQZ_I]] uint64x2_t test_vceqzq_p64(poly64x2_t a) { return vceqzq_p64(a); } -// CHECK-LABEL: define <8 x i8> @test_vcgez_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcgez_s8( // CHECK: [[TMP0:%.*]] = icmp sge <8 x i8> %a, zeroinitializer // CHECK: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCGEZ_I]] @@ -262,37 +242,34 @@ uint8x8_t test_vcgez_s8(int8x8_t a) { return vcgez_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vcgez_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vcgez_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp sge <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp sge <4 x i16> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCGEZ_I]] uint16x4_t test_vcgez_s16(int16x4_t a) { return vcgez_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vcgez_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcgez_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = icmp sge <2 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = icmp sge <2 x i32> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCGEZ_I]] uint32x2_t test_vcgez_s32(int32x2_t a) { return vcgez_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcgez_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp sge <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp sge <1 x i64> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCGEZ_I]] uint64x1_t test_vcgez_s64(int64x1_t a) { return vcgez_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vcgezq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcgezq_s8( // CHECK: [[TMP0:%.*]] = icmp sge <16 x i8> %a, zeroinitializer // CHECK: [[VCGEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCGEZ_I]] @@ -300,77 +277,70 @@ uint8x16_t test_vcgezq_s8(int8x16_t a) { return vcgezq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vcgezq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vcgezq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp sge <8 x i16> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCGEZ_I]] uint16x8_t test_vcgezq_s16(int16x8_t a) { return vcgezq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vcgezq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcgezq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = icmp sge <4 x i32> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCGEZ_I]] uint32x4_t test_vcgezq_s32(int32x4_t a) { return vcgezq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcgezq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcgezq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp sge <2 x i64> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCGEZ_I]] uint64x2_t test_vcgezq_s64(int64x2_t a) { return vcgezq_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcgez_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcgez_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fcmp oge <2 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = fcmp oge <2 x float> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCGEZ_I]] uint32x2_t test_vcgez_f32(float32x2_t a) { return vcgez_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vcgez_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcgez_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fcmp oge <1 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = fcmp oge <1 x double> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCGEZ_I]] uint64x1_t test_vcgez_f64(float64x1_t a) { return vcgez_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vcgezq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcgezq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fcmp oge <4 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = fcmp oge <4 x float> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCGEZ_I]] uint32x4_t test_vcgezq_f32(float32x4_t a) { return vcgezq_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcgezq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcgezq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fcmp oge <2 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = fcmp oge <2 x double> %a, zeroinitializer +// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCGEZ_I]] uint64x2_t test_vcgezq_f64(float64x2_t a) { return vcgezq_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vclez_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vclez_s8( // CHECK: [[TMP0:%.*]] = icmp sle <8 x i8> %a, zeroinitializer // CHECK: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCLEZ_I]] @@ -378,37 +348,34 @@ uint8x8_t test_vclez_s8(int8x8_t a) { return vclez_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vclez_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vclez_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp sle <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp sle <4 x i16> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCLEZ_I]] uint16x4_t test_vclez_s16(int16x4_t a) { return vclez_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vclez_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vclez_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = icmp sle <2 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = icmp sle <2 x i32> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCLEZ_I]] uint32x2_t test_vclez_s32(int32x2_t a) { return vclez_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vclez_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp sle <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp sle <1 x i64> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCLEZ_I]] uint64x1_t test_vclez_s64(int64x1_t a) { return vclez_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vclezq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclezq_s8( // CHECK: [[TMP0:%.*]] = icmp sle <16 x i8> %a, zeroinitializer // CHECK: [[VCLEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCLEZ_I]] @@ -416,77 +383,70 @@ uint8x16_t test_vclezq_s8(int8x16_t a) { return vclezq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vclezq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclezq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp sle <8 x i16> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCLEZ_I]] uint16x8_t test_vclezq_s16(int16x8_t a) { return vclezq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vclezq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclezq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = icmp sle <4 x i32> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCLEZ_I]] uint32x4_t test_vclezq_s32(int32x4_t a) { return vclezq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vclezq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vclezq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp sle <2 x i64> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCLEZ_I]] uint64x2_t test_vclezq_s64(int64x2_t a) { return vclezq_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vclez_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vclez_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fcmp ole <2 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = fcmp ole <2 x float> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCLEZ_I]] uint32x2_t test_vclez_f32(float32x2_t a) { return vclez_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vclez_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vclez_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fcmp ole <1 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = fcmp ole <1 x double> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCLEZ_I]] uint64x1_t test_vclez_f64(float64x1_t a) { return vclez_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vclezq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vclezq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fcmp ole <4 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = fcmp ole <4 x float> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCLEZ_I]] uint32x4_t test_vclezq_f32(float32x4_t a) { return vclezq_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vclezq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vclezq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fcmp ole <2 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = fcmp ole <2 x double> %a, zeroinitializer +// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCLEZ_I]] uint64x2_t test_vclezq_f64(float64x2_t a) { return vclezq_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vcgtz_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcgtz_s8( // CHECK: [[TMP0:%.*]] = icmp sgt <8 x i8> %a, zeroinitializer // CHECK: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCGTZ_I]] @@ -494,37 +454,34 @@ uint8x8_t test_vcgtz_s8(int8x8_t a) { return vcgtz_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vcgtz_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vcgtz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp sgt <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp sgt <4 x i16> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCGTZ_I]] uint16x4_t test_vcgtz_s16(int16x4_t a) { return vcgtz_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vcgtz_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcgtz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = icmp sgt <2 x i32> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCGTZ_I]] uint32x2_t test_vcgtz_s32(int32x2_t a) { return vcgtz_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcgtz_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp sgt <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp sgt <1 x i64> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCGTZ_I]] uint64x1_t test_vcgtz_s64(int64x1_t a) { return vcgtz_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vcgtzq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcgtzq_s8( // CHECK: [[TMP0:%.*]] = icmp sgt <16 x i8> %a, zeroinitializer // CHECK: [[VCGTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCGTZ_I]] @@ -532,77 +489,70 @@ uint8x16_t test_vcgtzq_s8(int8x16_t a) { return vcgtzq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vcgtzq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vcgtzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp sgt <8 x i16> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCGTZ_I]] uint16x8_t test_vcgtzq_s16(int16x8_t a) { return vcgtzq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vcgtzq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcgtzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = icmp sgt <4 x i32> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCGTZ_I]] uint32x4_t test_vcgtzq_s32(int32x4_t a) { return vcgtzq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcgtzq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcgtzq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp sgt <2 x i64> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCGTZ_I]] uint64x2_t test_vcgtzq_s64(int64x2_t a) { return vcgtzq_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcgtz_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcgtz_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fcmp ogt <2 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = fcmp ogt <2 x float> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCGTZ_I]] uint32x2_t test_vcgtz_f32(float32x2_t a) { return vcgtz_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vcgtz_f64(<1 x double> %a) #0 { +// CHECK-LABEL: @test_vcgtz_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fcmp ogt <1 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = fcmp ogt <1 x double> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCGTZ_I]] uint64x1_t test_vcgtz_f64(float64x1_t a) { return vcgtz_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vcgtzq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcgtzq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fcmp ogt <4 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = fcmp ogt <4 x float> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCGTZ_I]] uint32x4_t test_vcgtzq_f32(float32x4_t a) { return vcgtzq_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcgtzq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcgtzq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fcmp ogt <2 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = fcmp ogt <2 x double> %a, zeroinitializer +// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCGTZ_I]] uint64x2_t test_vcgtzq_f64(float64x2_t a) { return vcgtzq_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vcltz_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcltz_s8( // CHECK: [[TMP0:%.*]] = icmp slt <8 x i8> %a, zeroinitializer // CHECK: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK: ret <8 x i8> [[VCLTZ_I]] @@ -610,37 +560,34 @@ uint8x8_t test_vcltz_s8(int8x8_t a) { return vcltz_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vcltz_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vcltz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = icmp slt <4 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16> +// CHECK: [[TMP1:%.*]] = icmp slt <4 x i16> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16> // CHECK: ret <4 x i16> [[VCLTZ_I]] uint16x4_t test_vcltz_s16(int16x4_t a) { return vcltz_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vcltz_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcltz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = icmp slt <2 x i32> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCLTZ_I]] uint32x2_t test_vcltz_s32(int32x2_t a) { return vcltz_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vcltz_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP2:%.*]] = icmp slt <1 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = icmp slt <1 x i64> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCLTZ_I]] uint64x1_t test_vcltz_s64(int64x1_t a) { return vcltz_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vcltzq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcltzq_s8( // CHECK: [[TMP0:%.*]] = icmp slt <16 x i8> %a, zeroinitializer // CHECK: [[VCLTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8> // CHECK: ret <16 x i8> [[VCLTZ_I]] @@ -648,1593 +595,1454 @@ uint8x16_t test_vcltzq_s8(int8x16_t a) { return vcltzq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vcltzq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vcltzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> +// CHECK: [[TMP1:%.*]] = icmp slt <8 x i16> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> // CHECK: ret <8 x i16> [[VCLTZ_I]] uint16x8_t test_vcltzq_s16(int16x8_t a) { return vcltzq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vcltzq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcltzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = icmp slt <4 x i32> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCLTZ_I]] uint32x4_t test_vcltzq_s32(int32x4_t a) { return vcltzq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcltzq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcltzq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = icmp slt <2 x i64> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCLTZ_I]] uint64x2_t test_vcltzq_s64(int64x2_t a) { return vcltzq_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcltz_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcltz_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fcmp olt <2 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +// CHECK: [[TMP1:%.*]] = fcmp olt <2 x float> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> // CHECK: ret <2 x i32> [[VCLTZ_I]] uint32x2_t test_vcltz_f32(float32x2_t a) { return vcltz_f32(a); } - -// CHECK-LABEL: define <1 x i64> @test_vcltz_f64(<1 x double> %a) #0 { + +// CHECK-LABEL: @test_vcltz_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = fcmp olt <1 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64> +// CHECK: [[TMP1:%.*]] = fcmp olt <1 x double> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64> // CHECK: ret <1 x i64> [[VCLTZ_I]] uint64x1_t test_vcltz_f64(float64x1_t a) { return vcltz_f64(a); } -// CHECK-LABEL: define <4 x i32> @test_vcltzq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcltzq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fcmp olt <4 x float> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> +// CHECK: [[TMP1:%.*]] = fcmp olt <4 x float> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> // CHECK: ret <4 x i32> [[VCLTZ_I]] uint32x4_t test_vcltzq_f32(float32x4_t a) { return vcltzq_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcltzq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcltzq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fcmp olt <2 x double> [[TMP1]], zeroinitializer -// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64> +// CHECK: [[TMP1:%.*]] = fcmp olt <2 x double> %a, zeroinitializer +// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> // CHECK: ret <2 x i64> [[VCLTZ_I]] uint64x2_t test_vcltzq_f64(float64x2_t a) { return vcltzq_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vrev16_s8(int8x8_t a) { return vrev16_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vrev16_u8(uint8x8_t a) { return vrev16_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vrev16_p8(poly8x8_t a) { return vrev16_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrev16q_s8(int8x16_t a) { return vrev16q_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrev16q_u8(uint8x16_t a) { return vrev16q_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vrev16q_p8(poly8x16_t a) { return vrev16q_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vrev32_s8(int8x8_t a) { return vrev32_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vrev32_s16(int16x4_t a) { return vrev32_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vrev32_u8(uint8x8_t a) { return vrev32_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vrev32_u16(uint16x4_t a) { return vrev32_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vrev32_p8(poly8x8_t a) { return vrev32_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vrev32_p16(poly16x4_t a) { return vrev32_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrev32q_s8(int8x16_t a) { return vrev32q_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vrev32q_s16(int16x8_t a) { return vrev32q_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrev32q_u8(uint8x16_t a) { return vrev32q_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vrev32q_u16(uint16x8_t a) { return vrev32q_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vrev32q_p8(poly8x16_t a) { return vrev32q_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vrev32q_p16(poly16x8_t a) { return vrev32q_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vrev64_s8(int8x8_t a) { return vrev64_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vrev64_s16(int16x4_t a) { return vrev64_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vrev64_s32(int32x2_t a) { return vrev64_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vrev64_u8(uint8x8_t a) { return vrev64_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vrev64_u16(uint16x4_t a) { return vrev64_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vrev64_u32(uint32x2_t a) { return vrev64_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vrev64_p8(poly8x8_t a) { return vrev64_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vrev64_p16(poly16x4_t a) { return vrev64_p16(a); } -// CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrev64_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vrev64_f32(float32x2_t a) { return vrev64_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrev64q_s8(int8x16_t a) { return vrev64q_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vrev64q_s16(int16x8_t a) { return vrev64q_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vrev64q_s32(int32x4_t a) { return vrev64q_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrev64q_u8(uint8x16_t a) { return vrev64q_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vrev64q_u16(uint16x8_t a) { return vrev64q_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vrev64q_u32(uint32x4_t a) { return vrev64q_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vrev64q_p8(poly8x16_t a) { return vrev64q_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vrev64q_p16(poly16x8_t a) { return vrev64q_p16(a); } -// CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrev64q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vrev64q_f32(float32x4_t a) { return vrev64q_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddl_s8( // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #2 // CHECK: ret <4 x i16> [[VPADDL_I]] int16x4_t test_vpaddl_s8(int8x8_t a) { return vpaddl_s8(a); } -// CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %a) #2 // CHECK: ret <2 x i32> [[VPADDL1_I]] int32x2_t test_vpaddl_s16(int16x4_t a) { return vpaddl_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %a) #2 // CHECK: ret <1 x i64> [[VPADDL1_I]] int64x1_t test_vpaddl_s32(int32x2_t a) { return vpaddl_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddl_u8( // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #2 // CHECK: ret <4 x i16> [[VPADDL_I]] uint16x4_t test_vpaddl_u8(uint8x8_t a) { return vpaddl_u8(a); } -// CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a) #2 // CHECK: ret <2 x i32> [[VPADDL1_I]] uint32x2_t test_vpaddl_u16(uint16x4_t a) { return vpaddl_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a) #2 // CHECK: ret <1 x i64> [[VPADDL1_I]] uint64x1_t test_vpaddl_u32(uint32x2_t a) { return vpaddl_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_s8( // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #2 // CHECK: ret <8 x i16> [[VPADDL_I]] int16x8_t test_vpaddlq_s8(int8x16_t a) { return vpaddlq_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %a) #2 // CHECK: ret <4 x i32> [[VPADDL1_I]] int32x4_t test_vpaddlq_s16(int16x8_t a) { return vpaddlq_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %a) #2 // CHECK: ret <2 x i64> [[VPADDL1_I]] int64x2_t test_vpaddlq_s32(int32x4_t a) { return vpaddlq_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_u8( // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #2 // CHECK: ret <8 x i16> [[VPADDL_I]] uint16x8_t test_vpaddlq_u8(uint8x16_t a) { return vpaddlq_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a) #2 // CHECK: ret <4 x i32> [[VPADDL1_I]] uint32x4_t test_vpaddlq_u16(uint16x8_t a) { return vpaddlq_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a) #2 // CHECK: ret <2 x i64> [[VPADDL1_I]] uint64x2_t test_vpaddlq_u32(uint32x4_t a) { return vpaddlq_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadal_s8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #2 -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]] -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a +// CHECK: ret <4 x i16> [[TMP1]] int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { return vpadal_s8(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadal_s16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <2 x i32> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %b) #2 +// CHECK: [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a +// CHECK: ret <2 x i32> [[TMP2]] int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { return vpadal_s16(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadal_s32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <1 x i64> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %b) #2 +// CHECK: [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a +// CHECK: ret <1 x i64> [[TMP2]] int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { return vpadal_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadal_u8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #2 -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]] -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a +// CHECK: ret <4 x i16> [[TMP1]] uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { return vpadal_u8(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadal_u16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <2 x i32> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b) #2 +// CHECK: [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a +// CHECK: ret <2 x i32> [[TMP2]] uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { return vpadal_u16(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadal_u32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <1 x i64> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b) #2 +// CHECK: [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a +// CHECK: ret <1 x i64> [[TMP2]] uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { return vpadal_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadalq_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #2 -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]] -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a +// CHECK: ret <8 x i16> [[TMP1]] int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { return vpadalq_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadalq_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <4 x i32> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %b) #2 +// CHECK: [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a +// CHECK: ret <4 x i32> [[TMP2]] int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { return vpadalq_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadalq_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <2 x i64> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %b) #2 +// CHECK: [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a +// CHECK: ret <2 x i64> [[TMP2]] int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { return vpadalq_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadalq_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #2 -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]] -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a +// CHECK: ret <8 x i16> [[TMP1]] uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { return vpadalq_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadalq_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <4 x i32> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b) #2 +// CHECK: [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a +// CHECK: ret <4 x i32> [[TMP2]] uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { return vpadalq_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadalq_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #2 -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]] -// CHECK: ret <2 x i64> [[TMP3]] +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b) #2 +// CHECK: [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a +// CHECK: ret <2 x i64> [[TMP2]] uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { return vpadalq_u32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqabs_s8( // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VQABS_V_I]] int8x8_t test_vqabs_s8(int8x8_t a) { return vqabs_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s8( // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VQABSQ_V_I]] int8x16_t test_vqabsq_s8(int8x16_t a) { return vqabsq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #2 +// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %a) #2 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQABS_V1_I]] int16x4_t test_vqabs_s16(int16x4_t a) { return vqabs_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #2 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %a) #2 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VQABSQ_V1_I]] int16x8_t test_vqabsq_s16(int16x8_t a) { return vqabsq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #2 +// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %a) #2 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQABS_V1_I]] int32x2_t test_vqabs_s32(int32x2_t a) { return vqabs_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #2 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %a) #2 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VQABSQ_V1_I]] int32x4_t test_vqabsq_s32(int32x4_t a) { return vqabsq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> [[VQABSQ_V_I]]) #2 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> %a) #2 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <2 x i64> [[VQABSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP1]] +// CHECK: ret <2 x i64> [[VQABSQ_V1_I]] int64x2_t test_vqabsq_s64(int64x2_t a) { return vqabsq_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqneg_s8( // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VQNEG_V_I]] int8x8_t test_vqneg_s8(int8x8_t a) { return vqneg_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s8( // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]] int8x16_t test_vqnegq_s8(int8x16_t a) { return vqnegq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqneg_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #2 +// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %a) #2 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQNEG_V1_I]] int16x4_t test_vqneg_s16(int16x4_t a) { return vqneg_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #2 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %a) #2 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VQNEGQ_V1_I]] int16x8_t test_vqnegq_s16(int16x8_t a) { return vqnegq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqneg_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #2 +// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %a) #2 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQNEG_V1_I]] int32x2_t test_vqneg_s32(int32x2_t a) { return vqneg_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #2 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %a) #2 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VQNEGQ_V1_I]] int32x4_t test_vqnegq_s32(int32x4_t a) { return vqnegq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> [[VQNEGQ_V_I]]) #2 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> %a) #2 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <2 x i64> [[VQNEGQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP1]] +// CHECK: ret <2 x i64> [[VQNEGQ_V1_I]] int64x2_t test_vqnegq_s64(int64x2_t a) { return vqnegq_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vneg_s8( // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a // CHECK: ret <8 x i8> [[SUB_I]] int8x8_t test_vneg_s8(int8x8_t a) { return vneg_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vnegq_s8( // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a // CHECK: ret <16 x i8> [[SUB_I]] int8x16_t test_vnegq_s8(int8x16_t a) { return vnegq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vneg_s16( // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a // CHECK: ret <4 x i16> [[SUB_I]] int16x4_t test_vneg_s16(int16x4_t a) { return vneg_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vnegq_s16( // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vnegq_s16(int16x8_t a) { return vnegq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vneg_s32( // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a // CHECK: ret <2 x i32> [[SUB_I]] int32x2_t test_vneg_s32(int32x2_t a) { return vneg_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vnegq_s32( // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vnegq_s32(int32x4_t a) { return vnegq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vnegq_s64( // CHECK: [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, %a // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vnegq_s64(int64x2_t a) { return vnegq_s64(a); } -// CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vneg_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> , %a // CHECK: ret <2 x float> [[SUB_I]] float32x2_t test_vneg_f32(float32x2_t a) { return vneg_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vnegq_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %a // CHECK: ret <4 x float> [[SUB_I]] float32x4_t test_vnegq_f32(float32x4_t a) { return vnegq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vnegq_f64( // CHECK: [[SUB_I:%.*]] = fsub <2 x double> , %a // CHECK: ret <2 x double> [[SUB_I]] float64x2_t test_vnegq_f64(float64x2_t a) { return vnegq_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vabs_s8( // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VABS_I]] int8x8_t test_vabs_s8(int8x8_t a) { return vabs_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vabsq_s8( // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VABS_I]] int8x16_t test_vabsq_s8(int8x16_t a) { return vabsq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %a) #2 // CHECK: ret <4 x i16> [[VABS1_I]] int16x4_t test_vabs_s16(int16x4_t a) { return vabs_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %a) #2 // CHECK: ret <8 x i16> [[VABS1_I]] int16x8_t test_vabsq_s16(int16x8_t a) { return vabsq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %a) #2 // CHECK: ret <2 x i32> [[VABS1_I]] int32x2_t test_vabs_s32(int32x2_t a) { return vabs_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %a) #2 // CHECK: ret <4 x i32> [[VABS1_I]] int32x4_t test_vabsq_s32(int32x4_t a) { return vabsq_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vabsq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> %a) #2 // CHECK: ret <2 x i64> [[VABS1_I]] int64x2_t test_vabsq_s64(int64x2_t a) { return vabsq_s64(a); } -// CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vabs_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VABS1_I]] float32x2_t test_vabs_f32(float32x2_t a) { return vabs_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vabsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VABS1_I]] float32x4_t test_vabsq_f32(float32x4_t a) { return vabsq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vabsq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vabsq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[VABS_I]]) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VABS1_I]] float64x2_t test_vabsq_f64(float64x2_t a) { return vabsq_f64(a); } -// CHECK-LABEL: define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuqadd_s8( // CHECK: [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VUQADD_I]] int8x8_t test_vuqadd_s8(int8x8_t a, int8x8_t b) { return vuqadd_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuqaddq_s8( // CHECK: [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VUQADD_I]] int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) { return vuqaddq_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[VUQADD_I]], <4 x i16> [[VUQADD1_I]]) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #2 // CHECK: ret <4 x i16> [[VUQADD2_I]] int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) { return vuqadd_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> [[VUQADD_I]], <8 x i16> [[VUQADD1_I]]) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #2 // CHECK: ret <8 x i16> [[VUQADD2_I]] int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) { return vuqaddq_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> [[VUQADD_I]], <2 x i32> [[VUQADD1_I]]) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #2 // CHECK: ret <2 x i32> [[VUQADD2_I]] int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) { return vuqadd_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> [[VUQADD_I]], <4 x i32> [[VUQADD1_I]]) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #2 // CHECK: ret <4 x i32> [[VUQADD2_I]] int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) { return vuqaddq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vuqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> [[VUQADD_I]], <2 x i64> [[VUQADD1_I]]) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #2 // CHECK: ret <2 x i64> [[VUQADD2_I]] int64x2_t test_vuqaddq_s64(int64x2_t a, int64x2_t b) { return vuqaddq_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcls_s8( // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VCLS_V_I]] int8x8_t test_vcls_s8(int8x8_t a) { return vcls_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclsq_s8( // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VCLSQ_V_I]] int8x16_t test_vclsq_s8(int8x16_t a) { return vclsq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vcls_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> [[VCLS_V_I]]) #2 +// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %a) #2 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VCLS_V1_I]] int16x4_t test_vcls_s16(int16x4_t a) { return vcls_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #2 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %a) #2 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VCLSQ_V1_I]] int16x8_t test_vclsq_s16(int16x8_t a) { return vclsq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcls_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> [[VCLS_V_I]]) #2 +// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %a) #2 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VCLS_V1_I]] int32x2_t test_vcls_s32(int32x2_t a) { return vcls_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #2 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %a) #2 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VCLSQ_V1_I]] int32x4_t test_vclsq_s32(int32x4_t a) { return vclsq_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vclz_s8( // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2 // CHECK: ret <8 x i8> [[VCLZ_V_I]] int8x8_t test_vclz_s8(int8x8_t a) { return vclz_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclzq_s8( // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2 // CHECK: ret <16 x i8> [[VCLZQ_V_I]] int8x16_t test_vclzq_s8(int8x16_t a) { return vclzq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vclz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #2 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VCLZ_V1_I]] int16x4_t test_vclz_s16(int16x4_t a) { return vclz_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #2 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VCLZQ_V1_I]] int16x8_t test_vclzq_s16(int16x8_t a) { return vclzq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vclz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #2 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VCLZ_V1_I]] int32x2_t test_vclz_s32(int32x2_t a) { return vclz_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #2 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VCLZQ_V1_I]] int32x4_t test_vclzq_s32(int32x4_t a) { return vclzq_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vclz_u8( // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2 // CHECK: ret <8 x i8> [[VCLZ_V_I]] uint8x8_t test_vclz_u8(uint8x8_t a) { return vclz_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclzq_u8( // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2 // CHECK: ret <16 x i8> [[VCLZQ_V_I]] uint8x16_t test_vclzq_u8(uint8x16_t a) { return vclzq_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vclz_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #2 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VCLZ_V1_I]] uint16x4_t test_vclz_u16(uint16x4_t a) { return vclz_u16(a); } -// CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclzq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #2 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VCLZQ_V1_I]] uint16x8_t test_vclzq_u16(uint16x8_t a) { return vclzq_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vclz_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #2 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VCLZ_V1_I]] uint32x2_t test_vclz_u32(uint32x2_t a) { return vclz_u32(a); } -// CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclzq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #2 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VCLZQ_V1_I]] uint32x4_t test_vclzq_u32(uint32x4_t a) { return vclzq_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcnt_s8( // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VCNT_V_I]] int8x8_t test_vcnt_s8(int8x8_t a) { return vcnt_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcntq_s8( // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] int8x16_t test_vcntq_s8(int8x16_t a) { return vcntq_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcnt_u8( // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VCNT_V_I]] uint8x8_t test_vcnt_u8(uint8x8_t a) { return vcnt_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcntq_u8( // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] uint8x16_t test_vcntq_u8(uint8x16_t a) { return vcntq_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcnt_p8( // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VCNT_V_I]] poly8x8_t test_vcnt_p8(poly8x8_t a) { return vcnt_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcntq_p8( // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] poly8x16_t test_vcntq_p8(poly8x16_t a) { return vcntq_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvn_s8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, // CHECK: ret <8 x i8> [[NEG_I]] int8x8_t test_vmvn_s8(int8x8_t a) { return vmvn_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvnq_s8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, // CHECK: ret <16 x i8> [[NEG_I]] int8x16_t test_vmvnq_s8(int8x16_t a) { return vmvnq_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvn_s16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, // CHECK: ret <4 x i16> [[NEG_I]] int16x4_t test_vmvn_s16(int16x4_t a) { return vmvn_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvnq_s16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, // CHECK: ret <8 x i16> [[NEG_I]] int16x8_t test_vmvnq_s16(int16x8_t a) { return vmvnq_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvn_s32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, // CHECK: ret <2 x i32> [[NEG_I]] int32x2_t test_vmvn_s32(int32x2_t a) { return vmvn_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvnq_s32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, // CHECK: ret <4 x i32> [[NEG_I]] int32x4_t test_vmvnq_s32(int32x4_t a) { return vmvnq_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvn_u8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, // CHECK: ret <8 x i8> [[NEG_I]] uint8x8_t test_vmvn_u8(uint8x8_t a) { return vmvn_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvnq_u8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, // CHECK: ret <16 x i8> [[NEG_I]] uint8x16_t test_vmvnq_u8(uint8x16_t a) { return vmvnq_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvn_u16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, // CHECK: ret <4 x i16> [[NEG_I]] uint16x4_t test_vmvn_u16(uint16x4_t a) { return vmvn_u16(a); } -// CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvnq_u16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, // CHECK: ret <8 x i16> [[NEG_I]] uint16x8_t test_vmvnq_u16(uint16x8_t a) { return vmvnq_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvn_u32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, // CHECK: ret <2 x i32> [[NEG_I]] uint32x2_t test_vmvn_u32(uint32x2_t a) { return vmvn_u32(a); } -// CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvnq_u32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, // CHECK: ret <4 x i32> [[NEG_I]] uint32x4_t test_vmvnq_u32(uint32x4_t a) { return vmvnq_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvn_p8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, // CHECK: ret <8 x i8> [[NEG_I]] poly8x8_t test_vmvn_p8(poly8x8_t a) { return vmvn_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvnq_p8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, // CHECK: ret <16 x i8> [[NEG_I]] poly8x16_t test_vmvnq_p8(poly8x16_t a) { return vmvnq_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrbit_s8( // CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VRBIT_I]] int8x8_t test_vrbit_s8(int8x8_t a) { return vrbit_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrbitq_s8( // CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VRBIT_I]] int8x16_t test_vrbitq_s8(int8x16_t a) { return vrbitq_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrbit_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrbit_u8( // CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VRBIT_I]] uint8x8_t test_vrbit_u8(uint8x8_t a) { return vrbit_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrbitq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrbitq_u8( // CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VRBIT_I]] uint8x16_t test_vrbitq_u8(uint8x16_t a) { return vrbitq_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrbit_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrbit_p8( // CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2 // CHECK: ret <8 x i8> [[VRBIT_I]] poly8x8_t test_vrbit_p8(poly8x8_t a) { return vrbit_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrbitq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrbitq_p8( // CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2 // CHECK: ret <16 x i8> [[VRBIT_I]] poly8x16_t test_vrbitq_p8(poly8x16_t a) { return vrbitq_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> +// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[VMOVN_I]] int8x8_t test_vmovn_s16(int16x8_t a) { return vmovn_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> +// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[VMOVN_I]] int16x4_t test_vmovn_s32(int32x4_t a) { return vmovn_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vmovn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[VMOVN_I]] int32x2_t test_vmovn_s64(int64x2_t a) { return vmovn_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> +// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[VMOVN_I]] uint8x8_t test_vmovn_u16(uint16x8_t a) { return vmovn_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> +// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[VMOVN_I]] uint16x4_t test_vmovn_u32(uint32x4_t a) { return vmovn_u32(a); } -// CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vmovn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[VMOVN_I]] uint32x2_t test_vmovn_u64(uint64x2_t a) { return vmovn_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmovn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> +// CHECK: [[VMOVN_I_I:%.*]] = trunc <8 x i16> %b to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vmovn_high_s16(int8x8_t a, int16x8_t b) { return vmovn_high_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmovn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> +// CHECK: [[VMOVN_I_I:%.*]] = trunc <4 x i32> %b to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] int16x8_t test_vmovn_high_s32(int16x4_t a, int32x4_t b) { return vmovn_high_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vmovn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +// CHECK: [[VMOVN_I_I:%.*]] = trunc <2 x i64> %b to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] int32x4_t test_vmovn_high_s64(int32x2_t a, int64x2_t b) { return vmovn_high_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmovn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> +// CHECK: [[VMOVN_I_I:%.*]] = trunc <8 x i16> %b to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vmovn_high_u16(int8x8_t a, int16x8_t b) { return vmovn_high_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmovn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> +// CHECK: [[VMOVN_I_I:%.*]] = trunc <4 x i32> %b to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] int16x8_t test_vmovn_high_u32(int16x4_t a, int32x4_t b) { return vmovn_high_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vmovn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +// CHECK: [[VMOVN_I_I:%.*]] = trunc <2 x i64> %b to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] int32x4_t test_vmovn_high_u64(int32x2_t a, int64x2_t b) { return vmovn_high_u64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqmovun_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #2 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %a) #2 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]] int8x8_t test_vqmovun_s16(int16x8_t a) { return vqmovun_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqmovun_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #2 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %a) #2 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQMOVUN_V1_I]] int16x4_t test_vqmovun_s32(int32x4_t a) { return vqmovun_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqmovun_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #2 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %a) #2 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQMOVUN_V1_I]] int32x2_t test_vqmovun_s64(int64x2_t a) { return vqmovun_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqmovun_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I_I]]) #2 +// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %b) #2 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) { return vqmovun_high_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqmovun_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I_I]]) #2 +// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %b) #2 // CHECK: [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVUN_V1_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) { return vqmovun_high_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqmovun_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I_I]]) #2 +// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %b) #2 // CHECK: [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVUN_V1_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] int32x4_t test_vqmovun_high_s64(int32x2_t a, int64x2_t b) { return vqmovun_high_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqmovn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %a) #2 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] int8x8_t test_vqmovn_s16(int16x8_t a) { return vqmovn_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqmovn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %a) #2 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQMOVN_V1_I]] int16x4_t test_vqmovn_s32(int32x4_t a) { return vqmovn_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqmovn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %a) #2 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQMOVN_V1_I]] int32x2_t test_vqmovn_s64(int64x2_t a) { return vqmovn_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqmovn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]]) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %b) #2 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) { return vqmovn_high_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqmovn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]]) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %b) #2 // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] int16x8_t test_vqmovn_high_s32(int16x4_t a, int32x4_t b) { return vqmovn_high_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqmovn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]]) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %b) #2 // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] int32x4_t test_vqmovn_high_s64(int32x2_t a, int64x2_t b) { return vqmovn_high_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqmovn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %a) #2 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] uint8x8_t test_vqmovn_u16(uint16x8_t a) { return vqmovn_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqmovn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %a) #2 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQMOVN_V1_I]] uint16x4_t test_vqmovn_u32(uint32x4_t a) { return vqmovn_u32(a); } -// CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqmovn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %a) #2 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQMOVN_V1_I]] uint32x2_t test_vqmovn_u64(uint64x2_t a) { return vqmovn_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqmovn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]]) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %b) #2 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) { return vqmovn_high_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqmovn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]]) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %b) #2 // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] uint16x8_t test_vqmovn_high_u32(uint16x4_t a, uint32x4_t b) { return vqmovn_high_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqmovn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]]) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %b) #2 // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32> -// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> +// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) { return vqmovn_high_u64(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], // CHECK: ret <8 x i16> [[VSHLL_N]] @@ -2242,7 +2050,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 8); } -// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> @@ -2252,7 +2060,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 16); } -// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> @@ -2262,7 +2070,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 32); } -// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], // CHECK: ret <8 x i16> [[VSHLL_N]] @@ -2270,7 +2078,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 8); } -// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> @@ -2280,7 +2088,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 16); } -// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> @@ -2290,7 +2098,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 32); } -// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], @@ -2299,7 +2107,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) { return vshll_high_n_s8(a, 8); } -// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -2310,7 +2118,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) { return vshll_high_n_s16(a, 16); } -// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -2321,7 +2129,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) { return vshll_high_n_s32(a, 32); } -// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], @@ -2330,7 +2138,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { return vshll_high_n_u8(a, 8); } -// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -2341,7 +2149,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { return vshll_high_n_u16(a, 16); } -// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_high_n_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -2352,10 +2160,9 @@ uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 32); } -// CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #2 +// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) #2 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half> // CHECK: ret <4 x half> [[TMP1]] @@ -2363,10 +2170,9 @@ float16x4_t test_vcvt_f16_f32(float32x4_t a) { return vcvt_f16_f32(a); } -// CHECK-LABEL: define <8 x half> @test_vcvt_high_f16_f32(<4 x half> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcvt_high_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCVT_F16_F32_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I_I]]) #2 +// CHECK: [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %b) #2 // CHECK: [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x half> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x half> %a, <4 x half> [[TMP1]], <8 x i32> @@ -2375,693 +2181,617 @@ float16x8_t test_vcvt_high_f16_f32(float16x4_t a, float32x4_t b) { return vcvt_high_f16_f32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVT_I:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> +// CHECK: [[VCVT_I:%.*]] = fptrunc <2 x double> %a to <2 x float> // CHECK: ret <2 x float> [[VCVT_I]] float32x2_t test_vcvt_f32_f64(float64x2_t a) { return vcvt_f32_f64(a); } -// CHECK-LABEL: define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vcvt_high_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVT_I_I:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> +// CHECK: [[VCVT_I_I:%.*]] = fptrunc <2 x double> %b to <2 x float> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVT_I_I]], <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I_I]] float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) { return vcvt_high_f32_f64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtx_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTX_F32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I]]) #2 +// CHECK: [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x float> [[VCVTX_F32_V1_I]] float32x2_t test_vcvtx_f32_f64(float64x2_t a) { return vcvtx_f32_f64(a); } -// CHECK-LABEL: define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vcvtx_high_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VCVTX_F32_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I_I]]) #2 +// CHECK: [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #2 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVTX_F32_V1_I_I]], <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I_I]] float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) { return vcvtx_high_f32_f64(a, b); } -// CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #2 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: ret <4 x float> [[VCVT_F32_F161_I]] float32x4_t test_vcvt_f32_f16(float16x4_t a) { return vcvt_f32_f16(a); } -// CHECK-LABEL: define <4 x float> @test_vcvt_high_f32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vcvt_high_f32_f16( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]]) #2 // CHECK: [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: ret <4 x float> [[VCVT_F32_F161_I_I]] float32x4_t test_vcvt_high_f32_f16(float16x8_t a) { return vcvt_high_f32_f16(a); } -// CHECK-LABEL: define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_f64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVT_I:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +// CHECK: [[VCVT_I:%.*]] = fpext <2 x float> %a to <2 x double> // CHECK: ret <2 x double> [[VCVT_I]] float64x2_t test_vcvt_f64_f32(float32x2_t a) { return vcvt_f64_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_high_f64_f32( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[SHUFFLE_I_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVT_I_I:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +// CHECK: [[VCVT_I_I:%.*]] = fpext <2 x float> [[SHUFFLE_I_I]] to <2 x double> // CHECK: ret <2 x double> [[VCVT_I_I]] float64x2_t test_vcvt_high_f64_f32(float32x4_t a) { return vcvt_high_f64_f32(a); } -// CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrndn_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> [[VRNDN_I]]) #2 +// CHECK: [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDN1_I]] float32x2_t test_vrndn_f32(float32x2_t a) { return vrndn_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndnq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> [[VRNDN_I]]) #2 +// CHECK: [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDN1_I]] float32x4_t test_vrndnq_f32(float32x4_t a) { return vrndnq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndnq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> [[VRNDN_I]]) #2 +// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDN1_I]] float64x2_t test_vrndnq_f64(float64x2_t a) { return vrndnq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrnda_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[VRNDA_I]]) #2 +// CHECK: [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDA1_I]] float32x2_t test_vrnda_f32(float32x2_t a) { return vrnda_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndaq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[VRNDA_I]]) #2 +// CHECK: [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDA1_I]] float32x4_t test_vrndaq_f32(float32x4_t a) { return vrndaq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndaq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]]) #2 +// CHECK: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDA1_I]] float64x2_t test_vrndaq_f64(float64x2_t a) { return vrndaq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrndp_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[VRNDP_I]]) #2 +// CHECK: [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDP1_I]] float32x2_t test_vrndp_f32(float32x2_t a) { return vrndp_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndpq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VRNDP_I]]) #2 +// CHECK: [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDP1_I]] float32x4_t test_vrndpq_f32(float32x4_t a) { return vrndpq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndpq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]]) #2 +// CHECK: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDP1_I]] float64x2_t test_vrndpq_f64(float64x2_t a) { return vrndpq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrndm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_I]]) #2 +// CHECK: [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDM1_I]] float32x2_t test_vrndm_f32(float32x2_t a) { return vrndm_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDM_I]]) #2 +// CHECK: [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDM1_I]] float32x4_t test_vrndmq_f32(float32x4_t a) { return vrndmq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]]) #2 +// CHECK: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDM1_I]] float64x2_t test_vrndmq_f64(float64x2_t a) { return vrndmq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrndx_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> [[VRNDX_I]]) #2 +// CHECK: [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDX1_I]] float32x2_t test_vrndx_f32(float32x2_t a) { return vrndx_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[VRNDX_I]]) #2 +// CHECK: [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDX1_I]] float32x4_t test_vrndxq_f32(float32x4_t a) { return vrndxq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]]) #2 +// CHECK: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDX1_I]] float64x2_t test_vrndxq_f64(float64x2_t a) { return vrndxq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrnd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[VRNDZ_I]]) #2 +// CHECK: [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDZ1_I]] float32x2_t test_vrnd_f32(float32x2_t a) { return vrnd_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VRNDZ_I]]) #2 +// CHECK: [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDZ1_I]] float32x4_t test_vrndq_f32(float32x4_t a) { return vrndq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]]) #2 +// CHECK: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDZ1_I]] float64x2_t test_vrndq_f64(float64x2_t a) { return vrndq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrndi_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> [[VRNDI_I]]) #2 +// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRNDI1_I]] float32x2_t test_vrndi_f32(float32x2_t a) { return vrndi_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrndiq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDI_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[VRNDI_I]]) #2 +// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRNDI1_I]] float32x4_t test_vrndiq_f32(float32x4_t a) { return vrndiq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrndiq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDI_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[VRNDI_I]]) #2 +// CHECK: [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRNDI1_I]] float64x2_t test_vrndiq_f64(float64x2_t a) { return vrndiq_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptosi <2 x float> %a to <2 x i32> +// CHECK: ret <2 x i32> [[TMP1]] int32x2_t test_vcvt_s32_f32(float32x2_t a) { return vcvt_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptosi <4 x float> %a to <4 x i32> +// CHECK: ret <4 x i32> [[TMP1]] int32x4_t test_vcvtq_s32_f32(float32x4_t a) { return vcvtq_s32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fptosi <2 x double> [[TMP1]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptosi <2 x double> %a to <2 x i64> +// CHECK: ret <2 x i64> [[TMP1]] int64x2_t test_vcvtq_s64_f64(float64x2_t a) { return vcvtq_s64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP2:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptoui <2 x float> %a to <2 x i32> +// CHECK: ret <2 x i32> [[TMP1]] uint32x2_t test_vcvt_u32_f32(float32x2_t a) { return vcvt_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptoui <4 x float> %a to <4 x i32> +// CHECK: ret <4 x i32> [[TMP1]] uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { return vcvtq_u32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[TMP2:%.*]] = fptoui <2 x double> [[TMP1]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: [[TMP1:%.*]] = fptoui <2 x double> %a to <2 x i64> +// CHECK: ret <2 x i64> [[TMP1]] uint64x2_t test_vcvtq_u64_f64(float64x2_t a) { return vcvtq_u64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtn_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> [[VCVTN_I]]) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTN1_I]] int32x2_t test_vcvtn_s32_f32(float32x2_t a) { return vcvtn_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtnq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> [[VCVTN_I]]) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTN1_I]] int32x4_t test_vcvtnq_s32_f32(float32x4_t a) { return vcvtnq_s32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtnq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> [[VCVTN_I]]) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTN1_I]] int64x2_t test_vcvtnq_s64_f64(float64x2_t a) { return vcvtnq_s64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtn_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_I]]) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTN1_I]] uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { return vcvtn_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtnq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> [[VCVTN_I]]) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTN1_I]] uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) { return vcvtnq_u32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtnq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> [[VCVTN_I]]) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTN1_I]] uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) { return vcvtnq_u64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtp_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> [[VCVTP_I]]) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTP1_I]] int32x2_t test_vcvtp_s32_f32(float32x2_t a) { return vcvtp_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtpq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> [[VCVTP_I]]) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTP1_I]] int32x4_t test_vcvtpq_s32_f32(float32x4_t a) { return vcvtpq_s32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtpq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> [[VCVTP_I]]) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTP1_I]] int64x2_t test_vcvtpq_s64_f64(float64x2_t a) { return vcvtpq_s64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtp_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_I]]) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTP1_I]] uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { return vcvtp_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtpq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> [[VCVTP_I]]) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTP1_I]] uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) { return vcvtpq_u32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtpq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> [[VCVTP_I]]) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTP1_I]] uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) { return vcvtpq_u64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtm_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> [[VCVTM_I]]) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTM1_I]] int32x2_t test_vcvtm_s32_f32(float32x2_t a) { return vcvtm_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtmq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> [[VCVTM_I]]) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTM1_I]] int32x4_t test_vcvtmq_s32_f32(float32x4_t a) { return vcvtmq_s32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtmq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> [[VCVTM_I]]) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTM1_I]] int64x2_t test_vcvtmq_s64_f64(float64x2_t a) { return vcvtmq_s64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtm_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_I]]) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTM1_I]] uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { return vcvtm_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtmq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> [[VCVTM_I]]) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTM1_I]] uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) { return vcvtmq_u32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtmq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> [[VCVTM_I]]) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTM1_I]] uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) { return vcvtmq_u64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvta_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> [[VCVTA_I]]) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTA1_I]] int32x2_t test_vcvta_s32_f32(float32x2_t a) { return vcvta_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtaq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> [[VCVTA_I]]) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTA1_I]] int32x4_t test_vcvtaq_s32_f32(float32x4_t a) { return vcvtaq_s32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtaq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> [[VCVTA_I]]) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTA1_I]] int64x2_t test_vcvtaq_s64_f64(float64x2_t a) { return vcvtaq_s64_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvta_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> [[VCVTA_I]]) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTA1_I]] uint32x2_t test_vcvta_u32_f32(float32x2_t a) { return vcvta_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtaq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> [[VCVTA_I]]) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTA1_I]] uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) { return vcvtaq_u32_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vcvtaq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> [[VCVTA_I]]) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x i64> [[VCVTA1_I]] uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) { return vcvtaq_u64_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrsqrte_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #2 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]] float32x2_t test_vrsqrte_f32(float32x2_t a) { return vrsqrte_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrsqrteq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #2 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]] float32x4_t test_vrsqrteq_f32(float32x4_t a) { return vrsqrteq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrsqrteq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> [[VRSQRTEQ_V_I]]) #2 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRSQRTEQ_V1_I]] float64x2_t test_vrsqrteq_f64(float64x2_t a) { return vrsqrteq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrecpe_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #2 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VRECPE_V1_I]] float32x2_t test_vrecpe_f32(float32x2_t a) { return vrecpe_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrecpeq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #2 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]] float32x4_t test_vrecpeq_f32(float32x4_t a) { return vrecpeq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vrecpeq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> [[VRECPEQ_V_I]]) #2 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VRECPEQ_V1_I]] float64x2_t test_vrecpeq_f64(float64x2_t a) { return vrecpeq_f64(a); } -// CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrecpe_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #2 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %a) #2 // CHECK: ret <2 x i32> [[VRECPE_V1_I]] uint32x2_t test_vrecpe_u32(uint32x2_t a) { return vrecpe_u32(a); } -// CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrecpeq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #2 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %a) #2 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]] uint32x4_t test_vrecpeq_u32(uint32x4_t a) { return vrecpeq_u32(a); } -// CHECK-LABEL: define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vsqrt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[TMP1]]) #2 +// CHECK: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x float> [[VSQRT_I]] float32x2_t test_vsqrt_f32(float32x2_t a) { return vsqrt_f32(a); } -// CHECK-LABEL: define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vsqrtq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]]) #2 +// CHECK: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x float> [[VSQRT_I]] float32x4_t test_vsqrtq_f32(float32x4_t a) { return vsqrtq_f32(a); } -// CHECK-LABEL: define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 { +// CHECK-LABEL: @test_vsqrtq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]]) #2 +// CHECK: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2 // CHECK: ret <2 x double> [[VSQRT_I]] float64x2_t test_vsqrtq_f64(float64x2_t a) { return vsqrtq_f64(a); } -// CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> +// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[VCVT_I]] float32x2_t test_vcvt_f32_s32(int32x2_t a) { return vcvt_f32_s32(a); } -// CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float> +// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[VCVT_I]] float32x2_t test_vcvt_f32_u32(uint32x2_t a) { return vcvt_f32_u32(a); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> +// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[VCVT_I]] float32x4_t test_vcvtq_f32_s32(int32x4_t a) { return vcvtq_f32_s32(a); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> +// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[VCVT_I]] float32x4_t test_vcvtq_f32_u32(uint32x4_t a) { return vcvtq_f32_u32(a); } -// CHECK-LABEL: define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvtq_f64_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double> +// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i64> %a to <2 x double> // CHECK: ret <2 x double> [[VCVT_I]] float64x2_t test_vcvtq_f64_s64(int64x2_t a) { return vcvtq_f64_s64(a); } -// CHECK-LABEL: define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vcvtq_f64_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> +// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i64> %a to <2 x double> // CHECK: ret <2 x double> [[VCVT_I]] float64x2_t test_vcvtq_f64_u64(uint64x2_t a) { return vcvtq_f64_u64(a); diff --git a/clang/test/CodeGen/aarch64-neon-perm.c b/clang/test/CodeGen/aarch64-neon-perm.c index ca9f15d22c67..5b8a99c993fe 100644 --- a/clang/test/CodeGen/aarch64-neon-perm.c +++ b/clang/test/CodeGen/aarch64-neon-perm.c @@ -4,889 +4,889 @@ // Test new aarch64 intrinsics and types #include -// CHECK-LABEL: define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp1_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) { return vuzp1_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) { return vuzp1q_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp1_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) { return vuzp1_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) { return vuzp1q_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp1_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) { return vuzp1_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) { return vuzp1q_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) { return vuzp1q_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp1_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) { return vuzp1_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) { return vuzp1q_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp1_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) { return vuzp1_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) { return vuzp1q_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp1_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) { return vuzp1_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) { return vuzp1q_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) { return vuzp1q_u64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vuzp1_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) { return vuzp1_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) { return vuzp1q_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_f64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> // CHECK: ret <2 x double> [[SHUFFLE_I]] float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) { return vuzp1q_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp1_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) { return vuzp1_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) { return vuzp1q_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp1_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) { return vuzp1_p16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp1q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) { return vuzp1q_p16(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp2_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) { return vuzp2_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) { return vuzp2q_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp2_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) { return vuzp2_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) { return vuzp2q_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp2_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) { return vuzp2_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) { return vuzp2q_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) { return vuzp2q_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp2_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) { return vuzp2_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) { return vuzp2q_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp2_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) { return vuzp2_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) { return vuzp2q_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp2_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) { return vuzp2_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) { return vuzp2q_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) { return vuzp2q_u64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vuzp2_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) { return vuzp2_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) { return vuzp2q_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_f64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> // CHECK: ret <2 x double> [[SHUFFLE_I]] float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) { return vuzp2q_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp2_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) { return vuzp2_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) { return vuzp2q_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp2_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) { return vuzp2_p16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp2q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) { return vuzp2q_p16(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip1_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) { return vzip1_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip1q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) { return vzip1q_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip1_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) { return vzip1_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip1q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) { return vzip1q_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip1_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) { return vzip1_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip1q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) { return vzip1q_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vzip1q_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) { return vzip1q_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip1_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) { return vzip1_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip1q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) { return vzip1q_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip1_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) { return vzip1_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip1q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) { return vzip1q_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip1_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) { return vzip1_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip1q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) { return vzip1q_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vzip1q_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) { return vzip1q_u64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vzip1_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) { return vzip1_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vzip1q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) { return vzip1q_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vzip1q_f64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> // CHECK: ret <2 x double> [[SHUFFLE_I]] float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) { return vzip1q_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip1_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) { return vzip1_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip1q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) { return vzip1q_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip1_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) { return vzip1_p16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip1q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) { return vzip1q_p16(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip2_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) { return vzip2_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip2q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) { return vzip2q_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip2_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) { return vzip2_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip2q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) { return vzip2q_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip2_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) { return vzip2_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip2q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) { return vzip2q_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vzip2q_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) { return vzip2q_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip2_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) { return vzip2_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip2q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) { return vzip2q_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip2_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) { return vzip2_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip2q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) { return vzip2q_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip2_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) { return vzip2_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip2q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) { return vzip2q_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vzip2q_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) { return vzip2q_u64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vzip2_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) { return vzip2_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vzip2q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) { return vzip2q_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vzip2q_f64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> // CHECK: ret <2 x double> [[SHUFFLE_I]] float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) { return vzip2q_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip2_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) { return vzip2_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip2q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) { return vzip2q_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip2_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) { return vzip2_p16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip2q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) { return vzip2q_p16(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn1_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) { return vtrn1_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) { return vtrn1q_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn1_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) { return vtrn1_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) { return vtrn1q_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn1_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) { return vtrn1_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) { return vtrn1q_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) { return vtrn1q_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn1_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) { return vtrn1_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) { return vtrn1q_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn1_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) { return vtrn1_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) { return vtrn1q_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn1_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) { return vtrn1_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) { return vtrn1q_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) { return vtrn1q_u64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vtrn1_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) { return vtrn1_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) { return vtrn1q_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_f64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> // CHECK: ret <2 x double> [[SHUFFLE_I]] float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) { return vtrn1q_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn1_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) { return vtrn1_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) { return vtrn1q_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn1_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) { return vtrn1_p16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn1q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) { return vtrn1q_p16(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn2_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) { return vtrn2_s8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) { return vtrn2q_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn2_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) { return vtrn2_s16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) { return vtrn2q_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn2_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) { return vtrn2_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) { return vtrn2q_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) { return vtrn2q_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn2_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) { return vtrn2_u8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) { return vtrn2q_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn2_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) { return vtrn2_u16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) { return vtrn2q_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn2_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) { return vtrn2_u32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) { return vtrn2q_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) { return vtrn2q_u64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vtrn2_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) { return vtrn2_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) { return vtrn2q_f32(a, b); } -// CHECK-LABEL: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_f64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> // CHECK: ret <2 x double> [[SHUFFLE_I]] float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) { return vtrn2q_f64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn2_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) { return vtrn2_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) { return vtrn2q_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn2_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) { return vtrn2_p16(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn2q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) { return vtrn2q_p16(a, b); } -// CHECK-LABEL: define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 @@ -910,7 +910,7 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { return vuzp_s8(a, b); } -// CHECK-LABEL: define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 @@ -918,26 +918,25 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.int16x4x2_t [[TMP10]] int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { return vuzp_s16(a, b); } -// CHECK-LABEL: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vuzp_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 @@ -945,26 +944,25 @@ int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.int32x2x2_t [[TMP10]] int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { return vuzp_s32(a, b); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vuzp_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 @@ -987,7 +985,8 @@ int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { return vuzp_u8(a, b); } -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vuzp_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 @@ -995,26 +994,25 @@ uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.uint16x4x2_t [[TMP10]] uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { return vuzp_u16(a, b); } -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vuzp_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 @@ -1022,26 +1020,25 @@ uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.uint32x2x2_t [[TMP10]] uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { return vuzp_u32(a, b); } -// CHECK-LABEL: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) #0 { + +// CHECK-LABEL: @test_vuzp_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 @@ -1049,26 +1046,25 @@ uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.float32x2x2_t [[TMP10]] float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { return vuzp_f32(a, b); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vuzp_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 @@ -1091,7 +1087,8 @@ float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { return vuzp_p8(a, b); } -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vuzp_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 @@ -1099,26 +1096,25 @@ poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.poly16x4x2_t [[TMP10]] poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { return vuzp_p16(a, b); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 @@ -1141,7 +1137,8 @@ poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { return vuzpq_s8(a, b); } -// CHECK-LABEL: define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 @@ -1149,26 +1146,25 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.int16x8x2_t [[TMP10]] int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); } -// CHECK-LABEL: define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 @@ -1176,26 +1172,25 @@ int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.int32x4x2_t [[TMP10]] int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 @@ -1218,7 +1213,8 @@ int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { return vuzpq_u8(a, b); } -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 @@ -1226,26 +1222,25 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.uint16x8x2_t [[TMP10]] uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); } -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 @@ -1253,26 +1248,25 @@ uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.uint32x4x2_t [[TMP10]] uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); } -// CHECK-LABEL: define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 @@ -1280,26 +1274,25 @@ uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.float32x4x2_t [[TMP10]] float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 @@ -1322,7 +1315,8 @@ float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { return vuzpq_p8(a, b); } -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vuzpq_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 @@ -1330,27 +1324,25 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.poly16x8x2_t [[TMP10]] poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); } -// CHECK-LABEL: define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 @@ -1374,7 +1366,7 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { return vzip_s8(a, b); } -// CHECK-LABEL: define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 @@ -1382,26 +1374,25 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.int16x4x2_t [[TMP10]] int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { return vzip_s16(a, b); } -// CHECK-LABEL: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vzip_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 @@ -1409,26 +1400,25 @@ int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.int32x2x2_t [[TMP10]] int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { return vzip_s32(a, b); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vzip_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 @@ -1451,7 +1441,8 @@ int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { return vzip_u8(a, b); } -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vzip_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 @@ -1459,26 +1450,25 @@ uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.uint16x4x2_t [[TMP10]] uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { return vzip_u16(a, b); } -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vzip_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 @@ -1486,26 +1476,25 @@ uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.uint32x2x2_t [[TMP10]] uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { return vzip_u32(a, b); } -// CHECK-LABEL: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) #0 { + +// CHECK-LABEL: @test_vzip_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 @@ -1513,26 +1502,25 @@ uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.float32x2x2_t [[TMP10]] float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { return vzip_f32(a, b); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vzip_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 @@ -1555,7 +1543,8 @@ float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { return vzip_p8(a, b); } -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vzip_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 @@ -1563,26 +1552,25 @@ poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.poly16x4x2_t [[TMP10]] poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { return vzip_p16(a, b); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vzipq_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 @@ -1605,7 +1593,8 @@ poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { return vzipq_s8(a, b); } -// CHECK-LABEL: define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vzipq_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 @@ -1613,26 +1602,25 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.int16x8x2_t [[TMP10]] int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); } -// CHECK-LABEL: define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vzipq_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 @@ -1640,26 +1628,25 @@ int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.int32x4x2_t [[TMP10]] int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vzipq_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 @@ -1682,7 +1669,8 @@ int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { return vzipq_u8(a, b); } -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vzipq_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 @@ -1690,26 +1678,25 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.uint16x8x2_t [[TMP10]] uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); } -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vzipq_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 @@ -1717,26 +1704,25 @@ uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.uint32x4x2_t [[TMP10]] uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); } -// CHECK-LABEL: define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) #0 { + +// CHECK-LABEL: @test_vzipq_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 @@ -1744,26 +1730,25 @@ uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.float32x4x2_t [[TMP10]] float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vzipq_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 @@ -1786,7 +1771,8 @@ float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { return vzipq_p8(a, b); } -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vzipq_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 @@ -1794,27 +1780,25 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.poly16x8x2_t [[TMP10]] poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); } -// CHECK-LABEL: define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 @@ -1838,7 +1822,7 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { return vtrn_s8(a, b); } -// CHECK-LABEL: define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 @@ -1846,26 +1830,25 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.int16x4x2_t [[TMP10]] int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { return vtrn_s16(a, b); } -// CHECK-LABEL: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vtrn_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 @@ -1873,26 +1856,25 @@ int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.int32x2x2_t [[TMP10]] int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { return vtrn_s32(a, b); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vtrn_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 @@ -1915,7 +1897,8 @@ int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { return vtrn_u8(a, b); } -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vtrn_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 @@ -1923,26 +1906,25 @@ uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.uint16x4x2_t [[TMP10]] uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { return vtrn_u16(a, b); } -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) #0 { + +// CHECK-LABEL: @test_vtrn_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 @@ -1950,26 +1932,25 @@ uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.uint32x2x2_t [[TMP10]] uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { return vtrn_u32(a, b); } -// CHECK-LABEL: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) #0 { + +// CHECK-LABEL: @test_vtrn_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 @@ -1977,26 +1958,25 @@ uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 +// CHECK: store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.float32x2x2_t [[TMP10]] float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { return vtrn_f32(a, b); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) #0 { + +// CHECK-LABEL: @test_vtrn_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 @@ -2019,7 +1999,8 @@ float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { return vtrn_p8(a, b); } -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) #0 { + +// CHECK-LABEL: @test_vtrn_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 @@ -2027,26 +2008,25 @@ poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 -// CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8 +// CHECK: [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 +// CHECK: ret %struct.poly16x4x2_t [[TMP10]] poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { return vtrn_p16(a, b); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 @@ -2069,7 +2049,8 @@ poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { return vtrnq_s8(a, b); } -// CHECK-LABEL: define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 @@ -2077,26 +2058,25 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.int16x8x2_t [[TMP10]] int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); } -// CHECK-LABEL: define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 @@ -2104,26 +2084,25 @@ int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.int32x4x2_t [[TMP10]] int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 @@ -2146,7 +2125,8 @@ int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { return vtrnq_u8(a, b); } -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 @@ -2154,26 +2134,25 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.uint16x8x2_t [[TMP10]] uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); } -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 @@ -2181,26 +2160,25 @@ uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.uint32x4x2_t [[TMP10]] uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); } -// CHECK-LABEL: define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 @@ -2208,26 +2186,25 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 +// CHECK: store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.float32x4x2_t [[TMP10]] float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 @@ -2250,7 +2227,8 @@ float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { return vtrnq_p8(a, b); } -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) #0 { + +// CHECK-LABEL: @test_vtrnq_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 @@ -2258,22 +2236,20 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2 -// CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 -// CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 -// CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 -// CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP12]] +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2 +// CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 +// CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 +// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 +// CHECK: store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16 +// CHECK: [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 +// CHECK: ret %struct.poly16x8x2_t [[TMP10]] poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); } diff --git a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c index 39aab2540e8a..ac5a090fd2e6 100644 --- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -47,9 +47,7 @@ float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) { } // CHECK-LABEL: define <1 x double> @test_vmul_n_f64(<1 x double> %a, double %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP1]] to double +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %a to double // CHECK: [[TMP3:%.*]] = fmul double [[TMP2]], %b // CHECK: [[TMP4:%.*]] = bitcast double [[TMP3]] to <1 x double> // CHECK: ret <1 x double> [[TMP4]] diff --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c index 762ca94e5e57..eadeda640434 100644 --- a/clang/test/CodeGen/aarch64-poly64.c +++ b/clang/test/CodeGen/aarch64-poly64.c @@ -23,11 +23,7 @@ uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) { } // CHECK-LABEL: define <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[TMP4:%.*]] = and <1 x i64> %a, %b // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> // CHECK: ret <1 x i64> [[VTST_I]] @@ -36,11 +32,7 @@ uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) { } // CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> // CHECK: ret <2 x i64> [[VTST_I]] @@ -49,15 +41,9 @@ uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { } // CHECK-LABEL: define <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, +// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { @@ -65,15 +51,9 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { } // CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> -// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], -// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] +// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, +// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c index 3625e63b17a0..7471b1c230c9 100644 --- a/clang/test/CodeGen/arm-neon-directed-rounding.c +++ b/clang/test/CodeGen/arm-neon-directed-rounding.c @@ -3,133 +3,85 @@ #include // CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDA_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> [[VRNDA_V_I]]) #2 -// CHECK: [[VRNDA_V2_I:%.*]] = bitcast <2 x float> [[VRNDA_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDA_V2_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP1]] +// CHECK: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a) #2 +// CHECK: ret <2 x float> [[VRNDA_V1_I]] float32x2_t test_vrnda_f32(float32x2_t a) { return vrnda_f32(a); } // CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDAQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> [[VRNDAQ_V_I]]) #2 -// CHECK: [[VRNDAQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDAQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDAQ_V2_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a) #2 +// CHECK: ret <4 x float> [[VRNDAQ_V1_I]] float32x4_t test_vrndaq_f32(float32x4_t a) { return vrndaq_f32(a); } // CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> [[VRNDM_V_I]]) #2 -// CHECK: [[VRNDM_V2_I:%.*]] = bitcast <2 x float> [[VRNDM_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP1]] +// CHECK: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a) #2 +// CHECK: ret <2 x float> [[VRNDM_V1_I]] float32x2_t test_vrndm_f32(float32x2_t a) { return vrndm_f32(a); } // CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> [[VRNDMQ_V_I]]) #2 -// CHECK: [[VRNDMQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDMQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a) #2 +// CHECK: ret <4 x float> [[VRNDMQ_V1_I]] float32x4_t test_vrndmq_f32(float32x4_t a) { return vrndmq_f32(a); } // CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> [[VRNDN_V_I]]) #2 -// CHECK: [[VRNDN_V2_I:%.*]] = bitcast <2 x float> [[VRNDN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP1]] +// CHECK: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a) #2 +// CHECK: ret <2 x float> [[VRNDN_V1_I]] float32x2_t test_vrndn_f32(float32x2_t a) { return vrndn_f32(a); } // CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> [[VRNDNQ_V_I]]) #2 -// CHECK: [[VRNDNQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDNQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a) #2 +// CHECK: ret <4 x float> [[VRNDNQ_V1_I]] float32x4_t test_vrndnq_f32(float32x4_t a) { return vrndnq_f32(a); } // CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDP_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> [[VRNDP_V_I]]) #2 -// CHECK: [[VRNDP_V2_I:%.*]] = bitcast <2 x float> [[VRNDP_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDP_V2_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP1]] +// CHECK: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a) #2 +// CHECK: ret <2 x float> [[VRNDP_V1_I]] float32x2_t test_vrndp_f32(float32x2_t a) { return vrndp_f32(a); } // CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDPQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> [[VRNDPQ_V_I]]) #2 -// CHECK: [[VRNDPQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDPQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDPQ_V2_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a) #2 +// CHECK: ret <4 x float> [[VRNDPQ_V1_I]] float32x4_t test_vrndpq_f32(float32x4_t a) { return vrndpq_f32(a); } // CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> [[VRNDX_V_I]]) #2 -// CHECK: [[VRNDX_V2_I:%.*]] = bitcast <2 x float> [[VRNDX_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VRNDX_V2_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP1]] +// CHECK: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a) #2 +// CHECK: ret <2 x float> [[VRNDX_V1_I]] float32x2_t test_vrndx_f32(float32x2_t a) { return vrndx_f32(a); } // CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> [[VRNDXQ_V_I]]) #2 -// CHECK: [[VRNDXQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDXQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDXQ_V2_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a) #2 +// CHECK: ret <4 x float> [[VRNDXQ_V1_I]] float32x4_t test_vrndxq_f32(float32x4_t a) { return vrndxq_f32(a); } // CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRND_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> [[VRND_V_I]]) #2 -// CHECK: [[VRND_V2_I:%.*]] = bitcast <2 x float> [[VRND_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VRND_V2_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP1]] +// CHECK: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a) #2 +// CHECK: ret <2 x float> [[VRND_V1_I]] float32x2_t test_vrnd_f32(float32x2_t a) { return vrnd_f32(a); } // CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> [[VRNDQ_V_I]]) #2 -// CHECK: [[VRNDQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VRNDQ_V2_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a) #2 +// CHECK: ret <4 x float> [[VRNDQ_V1_I]] float32x4_t test_vrndq_f32(float32x4_t a) { return vrndq_f32(a); } diff --git a/clang/test/CodeGen/arm-neon-fma.c b/clang/test/CodeGen/arm-neon-fma.c index ff6acbcc2e91..9311f6be6834 100644 --- a/clang/test/CodeGen/arm-neon-fma.c +++ b/clang/test/CodeGen/arm-neon-fma.c @@ -8,26 +8,14 @@ #include // CHECK-LABEL: define <2 x float> @test_fma_order(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %accum to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %lhs to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %rhs to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2 +// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #2 // CHECK: ret <2 x float> [[TMP6]] float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) { return vfma_f32(accum, lhs, rhs); } // CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %accum to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %lhs to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %rhs to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2 +// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #2 // CHECK: ret <4 x float> [[TMP6]] float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) { return vfmaq_f32(accum, lhs, rhs); diff --git a/clang/test/CodeGen/arm-neon-numeric-maxmin.c b/clang/test/CodeGen/arm-neon-numeric-maxmin.c index 6e385b9c49ef..38f020a756df 100644 --- a/clang/test/CodeGen/arm-neon-numeric-maxmin.c +++ b/clang/test/CodeGen/arm-neon-numeric-maxmin.c @@ -3,53 +3,29 @@ #include // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAXNM_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMAXNM_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> [[VMAXNM_V_I]], <2 x float> [[VMAXNM_V1_I]]) #2 -// CHECK: [[VMAXNM_V3_I:%.*]] = bitcast <2 x float> [[VMAXNM_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAXNM_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #2 +// CHECK: ret <2 x float> [[VMAXNM_V2_I]] float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { return vmaxnm_f32(a, b); } // CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAXNMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMAXNMQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> [[VMAXNMQ_V_I]], <4 x float> [[VMAXNMQ_V1_I]]) #2 -// CHECK: [[VMAXNMQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXNMQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXNMQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #2 +// CHECK: ret <4 x float> [[VMAXNMQ_V2_I]] float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { return vmaxnmq_f32(a, b); } // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMINNM_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMINNM_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> [[VMINNM_V_I]], <2 x float> [[VMINNM_V1_I]]) #2 -// CHECK: [[VMINNM_V3_I:%.*]] = bitcast <2 x float> [[VMINNM_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMINNM_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #2 +// CHECK: ret <2 x float> [[VMINNM_V2_I]] float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { return vminnm_f32(a, b); } // CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMINNMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMINNMQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> [[VMINNMQ_V_I]], <4 x float> [[VMINNMQ_V1_I]]) #2 -// CHECK: [[VMINNMQ_V3_I:%.*]] = bitcast <4 x float> [[VMINNMQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINNMQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #2 +// CHECK: ret <4 x float> [[VMINNMQ_V2_I]] float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { return vminnmq_f32(a, b); } diff --git a/clang/test/CodeGen/arm-neon-vcvtX.c b/clang/test/CodeGen/arm-neon-vcvtX.c index 20cd97c858cb..4ea8fa874e48 100644 --- a/clang/test/CodeGen/arm-neon-vcvtX.c +++ b/clang/test/CodeGen/arm-neon-vcvtX.c @@ -3,144 +3,112 @@ #include // CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTA_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> [[VCVTA_S32_V_I]]) #2 +// CHECK: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTA_S32_V1_I]] int32x2_t test_vcvta_s32_f32(float32x2_t a) { return vcvta_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTA_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> [[VCVTA_U32_V_I]]) #2 +// CHECK: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTA_U32_V1_I]] uint32x2_t test_vcvta_u32_f32(float32x2_t a) { return vcvta_u32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTAQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> [[VCVTAQ_S32_V_I]]) #2 +// CHECK: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTAQ_S32_V1_I]] int32x4_t test_vcvtaq_s32_f32(float32x4_t a) { return vcvtaq_s32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTAQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> [[VCVTAQ_U32_V_I]]) #2 +// CHECK: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTAQ_U32_V1_I]] uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) { return vcvtaq_u32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTN_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> [[VCVTN_S32_V_I]]) #2 +// CHECK: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTN_S32_V1_I]] int32x2_t test_vcvtn_s32_f32(float32x2_t a) { return vcvtn_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTN_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_U32_V_I]]) #2 +// CHECK: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTN_U32_V1_I]] uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { return vcvtn_u32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTNQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> [[VCVTNQ_S32_V_I]]) #2 +// CHECK: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTNQ_S32_V1_I]] int32x4_t test_vcvtnq_s32_f32(float32x4_t a) { return vcvtnq_s32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTNQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> [[VCVTNQ_U32_V_I]]) #2 +// CHECK: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTNQ_U32_V1_I]] uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) { return vcvtnq_u32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTP_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> [[VCVTP_S32_V_I]]) #2 +// CHECK: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTP_S32_V1_I]] int32x2_t test_vcvtp_s32_f32(float32x2_t a) { return vcvtp_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTP_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_U32_V_I]]) #2 +// CHECK: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTP_U32_V1_I]] uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { return vcvtp_u32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTPQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> [[VCVTPQ_S32_V_I]]) #2 +// CHECK: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTPQ_S32_V1_I]] int32x4_t test_vcvtpq_s32_f32(float32x4_t a) { return vcvtpq_s32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTPQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> [[VCVTPQ_U32_V_I]]) #2 +// CHECK: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTPQ_U32_V1_I]] uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) { return vcvtpq_u32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTM_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> [[VCVTM_S32_V_I]]) #2 +// CHECK: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTM_S32_V1_I]] int32x2_t test_vcvtm_s32_f32(float32x2_t a) { return vcvtm_s32_f32(a); } // CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTM_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_U32_V_I]]) #2 +// CHECK: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #2 // CHECK: ret <2 x i32> [[VCVTM_U32_V1_I]] uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { return vcvtm_u32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTMQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> [[VCVTMQ_S32_V_I]]) #2 +// CHECK: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTMQ_S32_V1_I]] int32x4_t test_vcvtmq_s32_f32(float32x4_t a) { return vcvtmq_s32_f32(a); } // CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTMQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> [[VCVTMQ_U32_V_I]]) #2 +// CHECK: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #2 // CHECK: ret <4 x i32> [[VCVTMQ_U32_V1_I]] uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) { return vcvtmq_u32_f32(a);