Reduce the number of attributes attached to each function

Patch takes advantage of the implicit default behavior to reduce the number of attributes, which in turns reduces compilation time. Reviewed By: serge-sans-paille Differential Revision: https://reviews.llvm.org/D97116
2021-02-24 07:08:33 +01:00 · 2021-02-24 07:08:33 +01:00 · 053dc95839
parent 0f279c7a5c
commit 053dc95839
7 changed files with 6770 additions and 6083 deletions
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@ -493,7 +493,8 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
  // 4. Width of vector arguments and return types for this function.
  // 5. Width of vector aguments and return types for functions called by this
  //    function.
-  CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
+  if (LargestVectorWidth)
+    CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));

  // If we generated an unreachable return block, delete it now.
  if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) {
--- a/clang/test/CodeGen/aarch64-neon-ldst-one.c
+++ b/clang/test/CodeGen/aarch64-neon-ldst-one.c
--- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
+++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

@ -6,414 +7,496 @@
 #include <arm_neon.h>


-// CHECK-LABEL: define{{.*}} float @test_vmuls_lane_f32(float %a, <2 x float> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
-// CHECK:   [[MUL:%.*]] = fmul float %a, [[VGET_LANE]]
-// CHECK:   ret float [[MUL]]
+// CHECK-LABEL: @test_vmuls_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x float> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[A:%.*]], [[VGET_LANE]]
+// CHECK-NEXT:    ret float [[MUL]]
+//
 float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
  return vmuls_lane_f32(a, b, 1);
 }

-// CHECK-LABEL: define{{.*}} double @test_vmuld_lane_f64(double %a, <1 x double> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
-// CHECK:   [[MUL:%.*]] = fmul double %a, [[VGET_LANE]]
-// CHECK:   ret double [[MUL]]
+// CHECK-LABEL: @test_vmuld_lane_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[B:%.*]], i32 0
+// CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], [[VGET_LANE]]
+// CHECK-NEXT:    ret double [[MUL]]
+//
 float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
  return vmuld_lane_f64(a, b, 0);
 }

-// CHECK-LABEL: define{{.*}} float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
-// CHECK:   [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]]
-// CHECK:   ret float [[MUL]]
+// CHECK-LABEL: @test_vmuls_laneq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[A:%.*]], [[VGETQ_LANE]]
+// CHECK-NEXT:    ret float [[MUL]]
+//
 float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
  return vmuls_laneq_f32(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
-// CHECK:   [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]]
-// CHECK:   ret double [[MUL]]
+// CHECK-LABEL: @test_vmuld_laneq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], [[VGETQ_LANE]]
+// CHECK-NEXT:    ret double [[MUL]]
+//
 float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
  return vmuld_laneq_f64(a, b, 1);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmul_n_f64(<1 x double> %a, double %b) #0 {
-// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %a to double
-// CHECK:   [[TMP3:%.*]] = fmul double [[TMP2]], %b
-// CHECK:   [[TMP4:%.*]] = bitcast double [[TMP3]] to <1 x double>
-// CHECK:   ret <1 x double> [[TMP4]]
+// CHECK-LABEL: @test_vmul_n_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to double
+// CHECK-NEXT:    [[TMP1:%.*]] = fmul double [[TMP0]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast double [[TMP1]] to <1 x double>
+// CHECK-NEXT:    ret <1 x double> [[TMP2]]
+//
 float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
  return vmul_n_f64(a, b);
 }

-// CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float %a, <2 x float> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
-// CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]])
-// CHECK:   ret float [[VMULXS_F32_I]]
+// CHECK-LABEL: @test_vmulxs_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x float> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float [[A:%.*]], float [[VGET_LANE]]) [[ATTR5:#.*]]
+// CHECK-NEXT:    ret float [[VMULXS_F32_I]]
+//
 float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
  return vmulxs_lane_f32(a, b, 1);
 }

-// CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
-// CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]])
-// CHECK:   ret float [[VMULXS_F32_I]]
+// CHECK-LABEL: @test_vmulxs_laneq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float [[A:%.*]], float [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret float [[VMULXS_F32_I]]
+//
 float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
  return vmulxs_laneq_f32(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double %a, <1 x double> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]])
-// CHECK:   ret double [[VMULXD_F64_I]]
+// CHECK-LABEL: @test_vmulxd_lane_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[B:%.*]], i32 0
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[A:%.*]], double [[VGET_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret double [[VMULXD_F64_I]]
+//
 float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
  return vmulxd_lane_f64(a, b, 0);
 }

-// CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]])
-// CHECK:   ret double [[VMULXD_F64_I]]
+// CHECK-LABEL: @test_vmulxd_laneq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[A:%.*]], double [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret double [[VMULXD_F64_I]]
+//
 float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
  return vmulxd_laneq_f64(a, b, 1);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> %a, <1 x double> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
-// CHECK:   [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
-// CHECK:   ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: @test_vmulx_lane_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VGET_LANE3:%.*]] = extractelement <1 x double> [[B:%.*]], i32 0
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE3]]) [[ATTR5]]
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
+//
 float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
  return vmulx_lane_f64(a, b, 0);
 }


-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
-// CHECK:   ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: @test_vmulx_laneq_f64_0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
+//
 float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
  return vmulx_laneq_f64(a, b, 0);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
-// CHECK:   ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: @test_vmulx_laneq_f64_1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[A:%.*]], i32 0
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
+//
 float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
  return vmulx_laneq_f64(a, b, 1);
 }


-// CHECK-LABEL: define{{.*}} float @test_vfmas_lane_f32(float %a, float %b, <2 x float> %c) #0 {
-// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
-// CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
-// CHECK:   ret float [[TMP2]]
+// CHECK-LABEL: @test_vfmas_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x float> [[C:%.*]], i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = call float @llvm.fma.f32(float [[B:%.*]], float [[EXTRACT]], float [[A:%.*]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
 float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
  return vfmas_lane_f32(a, b, c, 1);
 }

-// CHECK-LABEL: define{{.*}} double @test_vfmad_lane_f64(double %a, double %b, <1 x double> %c) #0 {
-// CHECK:   [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
-// CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
-// CHECK:   ret double [[TMP2]]
+// CHECK-LABEL: @test_vfmad_lane_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <1 x double> [[C:%.*]], i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = call double @llvm.fma.f64(double [[B:%.*]], double [[EXTRACT]], double [[A:%.*]])
+// CHECK-NEXT:    ret double [[TMP0]]
+//
 float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
  return vfmad_lane_f64(a, b, c, 0);
 }

-// CHECK-LABEL: define{{.*}} double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 {
-// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
-// CHECK:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
-// CHECK:   ret double [[TMP2]]
+// CHECK-LABEL: @test_vfmad_laneq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x double> [[C:%.*]], i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = call double @llvm.fma.f64(double [[B:%.*]], double [[EXTRACT]], double [[A:%.*]])
+// CHECK-NEXT:    ret double [[TMP0]]
+//
 float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
  return vfmad_laneq_f64(a, b, c, 1);
 }

-// CHECK-LABEL: define{{.*}} float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %c) #0 {
-// CHECK:   [[SUB:%.*]] = fneg float %b
-// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
-// CHECK:   [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
-// CHECK:   ret float [[TMP2]]
+// CHECK-LABEL: @test_vfmss_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[B:%.*]]
+// CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x float> [[C:%.*]], i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = call float @llvm.fma.f32(float [[FNEG]], float [[EXTRACT]], float [[A:%.*]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
 float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
  return vfmss_lane_f32(a, b, c, 1);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfma_lane_f64(<1 x double> %a, <1 x double> %b, <1 x double> %v) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK:   [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
-// CHECK:   ret <1 x double> [[FMLA2]]
+// CHECK-LABEL: @test_vfma_lane_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT:    [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK-NEXT:    [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
+// CHECK-NEXT:    ret <1 x double> [[FMLA2]]
+//
 float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
  return vfma_lane_f64(a, b, v, 0);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfms_lane_f64(<1 x double> %a, <1 x double> %b, <1 x double> %v) #0 {
-// CHECK:   [[SUB:%.*]] = fneg <1 x double> %b
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK:   [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK:   [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK:   [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK:   [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
-// CHECK:   ret <1 x double> [[FMLA2]]
+// CHECK-LABEL: @test_vfms_lane_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FNEG:%.*]] = fneg <1 x double> [[B:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT:    [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK-NEXT:    [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
+// CHECK-NEXT:    ret <1 x double> [[FMLA2]]
+//
 float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
  return vfms_lane_f64(a, b, v, 0);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-// CHECK:   [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
-// CHECK:   [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
-// CHECK:   ret <1 x double> [[TMP7]]
+// CHECK-LABEL: @test_vfma_laneq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+// CHECK-NEXT:    [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
+// CHECK-NEXT:    ret <1 x double> [[TMP7]]
+//
 float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
  return vfma_laneq_f64(a, b, v, 0);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
-// CHECK:   [[SUB:%.*]] = fneg <1 x double> %b
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
-// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
-// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK:   [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-// CHECK:   [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
-// CHECK:   [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
-// CHECK:   ret <1 x double> [[TMP7]]
+// CHECK-LABEL: @test_vfms_laneq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FNEG:%.*]] = fneg <1 x double> [[B:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+// CHECK-NEXT:    [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
+// CHECK-NEXT:    ret <1 x double> [[TMP7]]
+//
 float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
  return vfms_laneq_f64(a, b, v, 0);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
-// CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
-// CHECK:   ret i32 [[TMP4]]
+// CHECK-LABEL: @test_vqdmullh_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
+// CHECK-NEXT:    [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) [[ATTR5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
 int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
  return vqdmullh_lane_s16(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
-// CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]])
-// CHECK:   ret i64 [[VQDMULLS_S32_I]]
+// CHECK-LABEL: @test_vqdmulls_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A:%.*]], i32 [[VGET_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret i64 [[VQDMULLS_S32_I]]
+//
 int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
  return vqdmulls_lane_s32(a, b, 1);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
-// CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
-// CHECK:   ret i32 [[TMP4]]
+// CHECK-LABEL: @test_vqdmullh_laneq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 7
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) [[ATTR5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
 int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
  return vqdmullh_laneq_s16(a, b, 7);
 }

-// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
-// CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]])
-// CHECK:   ret i64 [[VQDMULLS_S32_I]]
+// CHECK-LABEL: @test_vqdmulls_laneq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A:%.*]], i32 [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret i64 [[VQDMULLS_S32_I]]
+//
 int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
  return vqdmulls_laneq_s32(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
-// CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
-// CHECK:   ret i16 [[TMP4]]
+// CHECK-LABEL: @test_vqdmulhh_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
+// CHECK-NEXT:    [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) [[ATTR5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// CHECK-NEXT:    ret i16 [[TMP2]]
+//
 int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
  return vqdmulhh_lane_s16(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
-// CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]])
-// CHECK:   ret i32 [[VQDMULHS_S32_I]]
+// CHECK-LABEL: @test_vqdmulhs_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[A:%.*]], i32 [[VGET_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret i32 [[VQDMULHS_S32_I]]
+//
 int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
  return vqdmulhs_lane_s32(a, b, 1);
 }


-// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
-// CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
-// CHECK:   ret i16 [[TMP4]]
+// CHECK-LABEL: @test_vqdmulhh_laneq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 7
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) [[ATTR5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// CHECK-NEXT:    ret i16 [[TMP2]]
+//
 int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
  return vqdmulhh_laneq_s16(a, b, 7);
 }


-// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
-// CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
-// CHECK:   ret i32 [[VQDMULHS_S32_I]]
+// CHECK-LABEL: @test_vqdmulhs_laneq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[A:%.*]], i32 [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret i32 [[VQDMULHS_S32_I]]
+//
 int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
  return vqdmulhs_laneq_s32(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
-// CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
-// CHECK:   ret i16 [[TMP4]]
+// CHECK-LABEL: @test_vqrdmulhh_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
+// CHECK-NEXT:    [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) [[ATTR5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// CHECK-NEXT:    ret i16 [[TMP2]]
+//
 int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
  return vqrdmulhh_lane_s16(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
-// CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]])
-// CHECK:   ret i32 [[VQRDMULHS_S32_I]]
+// CHECK-LABEL: @test_vqrdmulhs_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <2 x i32> [[B:%.*]], i32 1
+// CHECK-NEXT:    [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[A:%.*]], i32 [[VGET_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret i32 [[VQRDMULHS_S32_I]]
+//
 int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
  return vqrdmulhs_lane_s32(a, b, 1);
 }


-// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
-// CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
-// CHECK:   ret i16 [[TMP4]]
+// CHECK-LABEL: @test_vqrdmulhh_laneq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 7
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT:    [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) [[ATTR5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// CHECK-NEXT:    ret i16 [[TMP2]]
+//
 int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
  return vqrdmulhh_laneq_s16(a, b, 7);
 }


-// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
-// CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
-// CHECK:   ret i32 [[VQRDMULHS_S32_I]]
+// CHECK-LABEL: @test_vqrdmulhs_laneq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 3
+// CHECK-NEXT:    [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[A:%.*]], i32 [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    ret i32 [[VQRDMULHS_S32_I]]
+//
 int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
  return vqrdmulhs_laneq_s32(a, b, 3);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlalh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 {
-// CHECK:   [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
-// CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]])
-// CHECK:   ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlalh_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT:    [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A:%.*]], i32 [[LANE0]])
+// CHECK-NEXT:    ret i32 [[VQDMLXL1]]
+//
 int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
  return vqdmlalh_lane_s16(a, b, c, 3);
 }

-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlals_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 {
-// CHECK:   [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
-// CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK:   ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlals_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B:%.*]], i32 [[LANE]])
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A:%.*]], i64 [[VQDMLXL]])
+// CHECK-NEXT:    ret i64 [[VQDMLXL1]]
+//
 int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
  return vqdmlals_lane_s32(a, b, c, 1);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
-// CHECK:   [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
-// CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]])
-// CHECK:   ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlalh_laneq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT:    [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A:%.*]], i32 [[LANE0]])
+// CHECK-NEXT:    ret i32 [[VQDMLXL1]]
+//
 int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
  return vqdmlalh_laneq_s16(a, b, c, 7);
 }

-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
-// CHECK:   [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
-// CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK:   ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlals_laneq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B:%.*]], i32 [[LANE]])
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A:%.*]], i64 [[VQDMLXL]])
+// CHECK-NEXT:    ret i64 [[VQDMLXL1]]
+//
 int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
  return vqdmlals_laneq_s32(a, b, c, 3);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlslh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 {
-// CHECK:   [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
-// CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]])
-// CHECK:   ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlslh_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT:    [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A:%.*]], i32 [[LANE0]])
+// CHECK-NEXT:    ret i32 [[VQDMLXL1]]
+//
 int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
  return vqdmlslh_lane_s16(a, b, c, 3);
 }

-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlsls_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 {
-// CHECK:   [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
-// CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK:   ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlsls_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B:%.*]], i32 [[LANE]])
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A:%.*]], i64 [[VQDMLXL]])
+// CHECK-NEXT:    ret i64 [[VQDMLXL1]]
+//
 int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
  return vqdmlsls_lane_s32(a, b, c, 1);
 }

-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
-// CHECK:   [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
-// CHECK:   [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
-// CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
-// CHECK:   [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK:   [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK:   [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]])
-// CHECK:   ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlslh_laneq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT:    [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A:%.*]], i32 [[LANE0]])
+// CHECK-NEXT:    ret i32 [[VQDMLXL1]]
+//
 int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
  return vqdmlslh_laneq_s16(a, b, c, 7);
 }

-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
-// CHECK:   [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
-// CHECK:   [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK:   [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK:   ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: @test_vqdmlsls_laneq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
+// CHECK-NEXT:    [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B:%.*]], i32 [[LANE]])
+// CHECK-NEXT:    [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A:%.*]], i64 [[VQDMLXL]])
+// CHECK-NEXT:    ret i64 [[VQDMLXL1]]
+//
 int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
  return vqdmlsls_laneq_s32(a, b, c, 3);
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
-// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
-// CHECK:   ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: @test_vmulx_lane_f64_0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[VGET_LANE8:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE8]]) [[ATTR5]]
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
+//
 float64x1_t test_vmulx_lane_f64_0() {
      float64x1_t arg1;
      float64x1_t arg2;
@ -425,15 +508,17 @@ float64x1_t test_vmulx_lane_f64_0() {
      return result;
 }

-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
-// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
-// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
-// CHECK:   ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: @test_vmulx_laneq_f64_2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
+// CHECK-NEXT:    [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) [[ATTR5]]
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT:    ret <1 x double> [[VSET_LANE]]
+//
 float64x1_t test_vmulx_laneq_f64_2() {
      float64x1_t arg1;
      float64x1_t arg2;
--- a/clang/test/CodeGen/aarch64-poly128.c
+++ b/clang/test/CodeGen/aarch64-poly128.c
@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
 // RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \
@ -12,238 +13,300 @@

 #include <arm_neon.h>

-// CHECK-LABEL: define{{.*}} void @test_vstrq_p128(i128* %ptr, i128 %val) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128* %ptr to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
-// CHECK:   store i128 %val, i128* [[TMP1]]
-// CHECK:   ret void
+// CHECK-LABEL: @test_vstrq_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128* [[PTR:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
+// CHECK-NEXT:    store i128 [[VAL:%.*]], i128* [[TMP1]], align 16
+// CHECK-NEXT:    ret void
+//
 void test_vstrq_p128(poly128_t * ptr, poly128_t val) {
  vstrq_p128(ptr, val);

 }

-// CHECK-LABEL: define{{.*}} i128 @test_vldrq_p128(i128* %ptr) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128* %ptr to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
-// CHECK:   [[TMP2:%.*]] = load i128, i128* [[TMP1]]
-// CHECK:   ret i128 [[TMP2]]
+// CHECK-LABEL: @test_vldrq_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128* [[PTR:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
+// CHECK-NEXT:    [[TMP2:%.*]] = load i128, i128* [[TMP1]], align 16
+// CHECK-NEXT:    ret i128 [[TMP2]]
+//
 poly128_t test_vldrq_p128(poly128_t * ptr) {
  return vldrq_p128(ptr);

 }

-// CHECK-LABEL: define{{.*}} void @test_ld_st_p128(i128* %ptr) #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128* %ptr to i8*
-// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
-// CHECK:   [[TMP2:%.*]] = load i128, i128* [[TMP1]]
-// CHECK:   [[ADD_PTR:%.*]] = getelementptr inbounds i128, i128* %ptr, i64 1
-// CHECK:   [[TMP3:%.*]] = bitcast i128* [[ADD_PTR]] to i8*
-// CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i128*
-// CHECK:   store i128 [[TMP2]], i128* [[TMP4]]
-// CHECK:   ret void
+// CHECK-LABEL: @test_ld_st_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128* [[PTR:%.*]] to i8*
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
+// CHECK-NEXT:    [[TMP2:%.*]] = load i128, i128* [[TMP1]], align 16
+// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i128, i128* [[PTR]], i64 1
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i128* [[ADD_PTR]] to i8*
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i128*
+// CHECK-NEXT:    store i128 [[TMP2]], i128* [[TMP4]], align 16
+// CHECK-NEXT:    ret void
+//
 void test_ld_st_p128(poly128_t * ptr) {
   vstrq_p128(ptr+1, vldrq_p128(ptr));

 }

-// CHECK-LABEL: define{{.*}} i128 @test_vmull_p64(i64 %a, i64 %b) #0 {
-// CHECK:   [[VMULL_P64_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) #3
-// CHECK:   [[VMULL_P641_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I]] to i128
-// CHECK:   ret i128 [[VMULL_P641_I]]
+// CHECK-LABEL: @test_vmull_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VMULL_P64_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[A:%.*]], i64 [[B:%.*]]) [[ATTR3:#.*]]
+// CHECK-NEXT:    [[VMULL_P641_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I]] to i128
+// CHECK-NEXT:    ret i128 [[VMULL_P641_I]]
+//
 poly128_t test_vmull_p64(poly64_t a, poly64_t b) {
  return vmull_p64(a, b);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #1 {
-// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
-// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to i64
-// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <2 x i64> %b, <2 x i64> %b, <1 x i32> <i32 1>
-// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I7_I]] to i64
-// CHECK:   [[VMULL_P64_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[TMP0]], i64 [[TMP1]]) #3
-// CHECK:   [[VMULL_P641_I_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I_I]] to i128
-// CHECK:   ret i128 [[VMULL_P641_I_I]]
+// CHECK-LABEL: @test_vmull_high_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[A]], <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to i64
+// CHECK-NEXT:    [[SHUFFLE_I7_I:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> [[B]], <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I7_I]] to i64
+// CHECK-NEXT:    [[VMULL_P64_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[TMP0]], i64 [[TMP1]]) [[ATTR3]]
+// CHECK-NEXT:    [[VMULL_P641_I_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I_I]] to i128
+// CHECK-NEXT:    ret i128 [[VMULL_P641_I_I]]
+//
 poly128_t test_vmull_high_p64(poly64x2_t a, poly64x2_t b) {
  return vmull_high_p64(a, b);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_s8(<16 x i8> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_s8(int8x16_t a) {
  return vreinterpretq_p128_s8(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_s16(<8 x i16> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_s16(int16x8_t a) {
  return vreinterpretq_p128_s16(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_s32(<4 x i32> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_s32(int32x4_t a) {
  return vreinterpretq_p128_s32(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_s64(<2 x i64> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_s64(int64x2_t a) {
  return vreinterpretq_p128_s64(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_u8(<16 x i8> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_u8(uint8x16_t a) {
  return vreinterpretq_p128_u8(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_u16(<8 x i16> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_u16(uint16x8_t a) {
  return vreinterpretq_p128_u16(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_u32(<4 x i32> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_u32(uint32x4_t a) {
  return vreinterpretq_p128_u32(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_u64(<2 x i64> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_u64(uint64x2_t a) {
  return vreinterpretq_p128_u64(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_f32(<4 x float> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_f32(float32x4_t a) {
  return vreinterpretq_p128_f32(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_f64(<2 x double> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_f64(float64x2_t a) {
  return vreinterpretq_p128_f64(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_p8(<16 x i8> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_p8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_p8(poly8x16_t a) {
  return vreinterpretq_p128_p8(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_p16(<8 x i16> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_p16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_p16(poly16x8_t a) {
  return vreinterpretq_p128_p16(a);
 }

-// CHECK-LABEL: define{{.*}} i128 @test_vreinterpretq_p128_p64(<2 x i64> %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to i128
-// CHECK:   ret i128 [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p128_p64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to i128
+// CHECK-NEXT:    ret i128 [[TMP0]]
+//
 poly128_t test_vreinterpretq_p128_p64(poly64x2_t a) {
  return vreinterpretq_p128_p64(a);
 }

-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vreinterpretq_s8_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <16 x i8>
-// CHECK:   ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_s8_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
 int8x16_t test_vreinterpretq_s8_p128(poly128_t a) {
  return vreinterpretq_s8_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vreinterpretq_s16_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_s16_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
 int16x8_t test_vreinterpretq_s16_p128(poly128_t  a) {
  return vreinterpretq_s16_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vreinterpretq_s32_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_s32_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
 int32x4_t test_vreinterpretq_s32_p128(poly128_t a) {
  return vreinterpretq_s32_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vreinterpretq_s64_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_s64_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
 int64x2_t test_vreinterpretq_s64_p128(poly128_t  a) {
  return vreinterpretq_s64_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vreinterpretq_u8_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <16 x i8>
-// CHECK:   ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_u8_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
 uint8x16_t test_vreinterpretq_u8_p128(poly128_t  a) {
  return vreinterpretq_u8_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vreinterpretq_u16_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_u16_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
 uint16x8_t test_vreinterpretq_u16_p128(poly128_t  a) {
  return vreinterpretq_u16_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vreinterpretq_u32_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <4 x i32>
-// CHECK:   ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_u32_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
 uint32x4_t test_vreinterpretq_u32_p128(poly128_t  a) {
  return vreinterpretq_u32_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vreinterpretq_u64_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_u64_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
 uint64x2_t test_vreinterpretq_u64_p128(poly128_t  a) {
  return vreinterpretq_u64_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <4 x float> @test_vreinterpretq_f32_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <4 x float>
-// CHECK:   ret <4 x float> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_f32_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
 float32x4_t test_vreinterpretq_f32_p128(poly128_t  a) {
  return vreinterpretq_f32_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <2 x double> @test_vreinterpretq_f64_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <2 x double>
-// CHECK:   ret <2 x double> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_f64_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    ret <2 x double> [[TMP0]]
+//
 float64x2_t test_vreinterpretq_f64_p128(poly128_t  a) {
  return vreinterpretq_f64_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vreinterpretq_p8_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <16 x i8>
-// CHECK:   ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p8_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
 poly8x16_t test_vreinterpretq_p8_p128(poly128_t  a) {
  return vreinterpretq_p8_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vreinterpretq_p16_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <8 x i16>
-// CHECK:   ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p16_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
 poly16x8_t test_vreinterpretq_p16_p128(poly128_t  a) {
  return vreinterpretq_p16_p128(a);
 }

-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vreinterpretq_p64_p128(i128 %a) #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i128 %a to <2 x i64>
-// CHECK:   ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: @test_vreinterpretq_p64_p128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
 poly64x2_t test_vreinterpretq_p64_p128(poly128_t  a) {
  return vreinterpretq_p64_p128(a);
 }
--- a/clang/test/CodeGen/aarch64-poly64.c
+++ b/clang/test/CodeGen/aarch64-poly64.c
@ -600,4 +600,4 @@ poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {

 // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
 // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
-// CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0"
+// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width"="0"
--- a/clang/test/CodeGenCXX/dllexport-ctor-closure.cpp
+++ b/clang/test/CodeGenCXX/dllexport-ctor-closure.cpp
@ -5,7 +5,7 @@

 struct CtorWithClosure {
  __declspec(dllexport) CtorWithClosure(...) {}
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosure@@QAEXXZ"({{.*}}) comdat
 // CHECK:   %[[this_addr:.*]] = alloca %struct.CtorWithClosure*, align 4
 // CHECK:   store %struct.CtorWithClosure* %this, %struct.CtorWithClosure** %[[this_addr]], align 4
 // CHECK:   %[[this:.*]] = load %struct.CtorWithClosure*, %struct.CtorWithClosure** %[[this_addr]]
@ -17,7 +17,7 @@ struct CtorWithClosureOutOfLine {
  __declspec(dllexport) CtorWithClosureOutOfLine(...);
 };
 CtorWithClosureOutOfLine::CtorWithClosureOutOfLine(...) {}
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FCtorWithClosureOutOfLine@@QAEXXZ"({{.*}}) comdat

 #define DELETE_IMPLICIT_MEMBERS(ClassName) \
    ClassName(ClassName &&) = delete; \
@ -28,7 +28,7 @@ CtorWithClosureOutOfLine::CtorWithClosureOutOfLine(...) {}
 struct __declspec(dllexport) ClassWithClosure {
  DELETE_IMPLICIT_MEMBERS(ClassWithClosure);
  ClassWithClosure(...) {}
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FClassWithClosure@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FClassWithClosure@@QAEXXZ"({{.*}}) comdat
 // CHECK:   %[[this_addr:.*]] = alloca %struct.ClassWithClosure*, align 4
 // CHECK:   store %struct.ClassWithClosure* %this, %struct.ClassWithClosure** %[[this_addr]], align 4
 // CHECK:   %[[this:.*]] = load %struct.ClassWithClosure*, %struct.ClassWithClosure** %[[this_addr]]
@ -44,10 +44,10 @@ template struct __declspec(dllexport) TemplateWithClosure<char>;
 extern template struct TemplateWithClosure<int>;
 template struct __declspec(dllexport) TemplateWithClosure<int>;

-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) comdat
 // CHECK:   call {{.*}} @"??0?$TemplateWithClosure@D@@QAE@H@Z"({{.*}}, i32 1)

-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) comdat
 // CHECK:   call {{.*}} @"??0?$TemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 4)

 template <typename T> struct __declspec(dllexport) ExportedTemplateWithClosure {
@ -55,7 +55,7 @@ template <typename T> struct __declspec(dllexport) ExportedTemplateWithClosure {
 };
 template <> ExportedTemplateWithClosure<int>::ExportedTemplateWithClosure(int); // Don't try to emit the closure for a declaration.
 template <> ExportedTemplateWithClosure<int>::ExportedTemplateWithClosure(int) {};
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$ExportedTemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$ExportedTemplateWithClosure@H@@QAEXXZ"({{.*}}) comdat
 // CHECK:   call {{.*}} @"??0?$ExportedTemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 4)

 struct __declspec(dllexport) NestedOuter {
@ -67,8 +67,8 @@ struct __declspec(dllexport) NestedOuter {
  };
 };

-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
-// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedOuter@@QAEXXZ"({{.*}}) comdat
+// CHECK-LABEL: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) comdat

 struct HasDtor {
  ~HasDtor();
--- a/clang/test/CodeGenCXX/dllexport.cpp
+++ b/clang/test/CodeGenCXX/dllexport.cpp
@ -535,7 +535,7 @@ struct SomeTemplate {
 // MSVC2013-DAG: define weak_odr dso_local dllexport {{.+}} @"??4?$SomeTemplate@H@@Q{{.+}}0@A{{.+}}0@@Z"
 struct __declspec(dllexport) InheritFromTemplate : SomeTemplate<int> {};

-// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) comdat

 namespace PR23801 {
 template <typename>
@ -552,7 +552,7 @@ struct __declspec(dllexport) B {

 }
 //
-// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FB@PR23801@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat
+// M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??_FB@PR23801@@QAEXXZ"({{.*}}) comdat

 struct __declspec(dllexport) T {
  // Copy assignment operator: