diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll index 3c8dae23718f..0bdb7120d0ef 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s @@ -43,8 +44,11 @@ define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { ; CHECK-LABEL: test_ld_from_poll_v16i8: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret entry: %b = add <16 x i8> %a, ret <16 x i8> %b @@ -52,8 +56,11 @@ entry: define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { ; CHECK-LABEL: test_ld_from_poll_v8i16: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret entry: %b = add <8 x i16> %a, ret <8 x i16> %b @@ -61,8 +68,11 @@ entry: define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { ; CHECK-LABEL: test_ld_from_poll_v4i32: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %b = add <4 x i32> %a, ret <4 x i32> %b @@ -70,8 +80,11 @@ entry: define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { ; CHECK-LABEL: test_ld_from_poll_v2i64: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %b = add <2 x i64> %a, ret <2 x i64> %b @@ -79,8 +92,11 @@ entry: define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { ; CHECK-LABEL: test_ld_from_poll_v4f32: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret entry: %b = fadd <4 x float> %a, ret <4 x float> %b @@ -88,8 +104,11 @@ entry: define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { ; CHECK-LABEL: test_ld_from_poll_v2f64: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret entry: %b = fadd <2 x double> %a, ret <2 x double> %b @@ -97,8 +116,11 @@ entry: define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { ; CHECK-LABEL: test_ld_from_poll_v8i8: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI6_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret entry: %b = add <8 x i8> %a, ret <8 x i8> %b @@ -106,8 +128,11 @@ entry: define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { ; CHECK-LABEL: test_ld_from_poll_v4i16: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret entry: %b = add <4 x i16> %a, ret <4 x i16> %b @@ -115,8 +140,11 @@ entry: define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { ; CHECK-LABEL: test_ld_from_poll_v2i32: -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret entry: %b = add <2 x i32> %a, ret <2 x i32> %b @@ -124,7 +152,9 @@ entry: define <16 x i8> @test_vld1q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld1q_dup_s8: -; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.16b }, [x0] +; CHECK-NEXT: ret entry: %0 = load i8, i8* %a, align 1 %1 = insertelement <16 x i8> undef, i8 %0, i32 0 @@ -134,7 +164,9 @@ entry: define <8 x i16> @test_vld1q_dup_s16(i16* %a) { ; CHECK-LABEL: test_vld1q_dup_s16: -; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.8h }, [x0] +; CHECK-NEXT: ret entry: %0 = load i16, i16* %a, align 2 %1 = insertelement <8 x i16> undef, i16 %0, i32 0 @@ -144,7 +176,9 @@ entry: define <4 x i32> @test_vld1q_dup_s32(i32* %a) { ; CHECK-LABEL: test_vld1q_dup_s32: -; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.4s }, [x0] +; CHECK-NEXT: ret entry: %0 = load i32, i32* %a, align 4 %1 = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -154,7 +188,9 @@ entry: define <2 x i64> @test_vld1q_dup_s64(i64* %a) { ; CHECK-LABEL: test_vld1q_dup_s64: -; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.2d }, [x0] +; CHECK-NEXT: ret entry: %0 = load i64, i64* %a, align 8 %1 = insertelement <2 x i64> undef, i64 %0, i32 0 @@ -164,7 +200,9 @@ entry: define <4 x float> @test_vld1q_dup_f32(float* %a) { ; CHECK-LABEL: test_vld1q_dup_f32: -; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.4s }, [x0] +; CHECK-NEXT: ret entry: %0 = load float, float* %a, align 4 %1 = insertelement <4 x float> undef, float %0, i32 0 @@ -174,7 +212,9 @@ entry: define <2 x double> @test_vld1q_dup_f64(double* %a) { ; CHECK-LABEL: test_vld1q_dup_f64: -; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.2d }, [x0] +; CHECK-NEXT: ret entry: %0 = load double, double* %a, align 8 %1 = insertelement <2 x double> undef, double %0, i32 0 @@ -184,7 +224,9 @@ entry: define <8 x i8> @test_vld1_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld1_dup_s8: -; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.8b }, [x0] +; CHECK-NEXT: ret entry: %0 = load i8, i8* %a, align 1 %1 = insertelement <8 x i8> undef, i8 %0, i32 0 @@ -194,7 +236,9 @@ entry: define <4 x i16> @test_vld1_dup_s16(i16* %a) { ; CHECK-LABEL: test_vld1_dup_s16: -; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.4h }, [x0] +; CHECK-NEXT: ret entry: %0 = load i16, i16* %a, align 2 %1 = insertelement <4 x i16> undef, i16 %0, i32 0 @@ -204,7 +248,9 @@ entry: define <2 x i32> @test_vld1_dup_s32(i32* %a) { ; CHECK-LABEL: test_vld1_dup_s32: -; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.2s }, [x0] +; CHECK-NEXT: ret entry: %0 = load i32, i32* %a, align 4 %1 = insertelement <2 x i32> undef, i32 %0, i32 0 @@ -214,7 +260,9 @@ entry: define <1 x i64> @test_vld1_dup_s64(i64* %a) { ; CHECK-LABEL: test_vld1_dup_s64: -; CHECK: ldr {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret entry: %0 = load i64, i64* %a, align 8 %1 = insertelement <1 x i64> undef, i64 %0, i32 0 @@ -223,7 +271,9 @@ entry: define <2 x float> @test_vld1_dup_f32(float* %a) { ; CHECK-LABEL: test_vld1_dup_f32: -; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1r { v0.2s }, [x0] +; CHECK-NEXT: ret entry: %0 = load float, float* %a, align 4 %1 = insertelement <2 x float> undef, float %0, i32 0 @@ -233,7 +283,9 @@ entry: define <1 x double> @test_vld1_dup_f64(double* %a) { ; CHECK-LABEL: test_vld1_dup_f64: -; CHECK: ldr {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret entry: %0 = load double, double* %a, align 8 %1 = insertelement <1 x double> undef, double %0, i32 0 @@ -244,9 +296,11 @@ define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { ; As there is a store operation depending on %1, LD1R pattern can't be selected. ; So LDR and FMOV should be emitted. ; CHECK-LABEL: testDUP.v1i64: -; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}} -; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret %1 = load i64, i64* %a, align 8 store i64 %1, i64* %b, align 8 %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 @@ -257,8 +311,10 @@ define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { ; As there is a store operation depending on %1, LD1R pattern can't be selected. ; So LDR and FMOV should be emitted. ; CHECK-LABEL: testDUP.v1f64: -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] -; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: str d0, [x1] +; CHECK-NEXT: ret %1 = load double, double* %a, align 8 store double %1, double* %b, align 8 %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 @@ -267,7 +323,9 @@ define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) { ; CHECK-LABEL: test_vld1q_lane_s8: -; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.b }[15], [x0] +; CHECK-NEXT: ret entry: %0 = load i8, i8* %a, align 1 %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 @@ -276,7 +334,9 @@ entry: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) { ; CHECK-LABEL: test_vld1q_lane_s16: -; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.h }[7], [x0] +; CHECK-NEXT: ret entry: %0 = load i16, i16* %a, align 2 %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 @@ -285,7 +345,9 @@ entry: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) { ; CHECK-LABEL: test_vld1q_lane_s32: -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.s }[3], [x0] +; CHECK-NEXT: ret entry: %0 = load i32, i32* %a, align 4 %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3 @@ -294,7 +356,9 @@ entry: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) { ; CHECK-LABEL: test_vld1q_lane_s64: -; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.d }[1], [x0] +; CHECK-NEXT: ret entry: %0 = load i64, i64* %a, align 8 %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1 @@ -303,7 +367,9 @@ entry: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) { ; CHECK-LABEL: test_vld1q_lane_f32: -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.s }[3], [x0] +; CHECK-NEXT: ret entry: %0 = load float, float* %a, align 4 %vld1_lane = insertelement <4 x float> %b, float %0, i32 3 @@ -312,7 +378,9 @@ entry: define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) { ; CHECK-LABEL: test_vld1q_lane_f64: -; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.d }[1], [x0] +; CHECK-NEXT: ret entry: %0 = load double, double* %a, align 8 %vld1_lane = insertelement <2 x double> %b, double %0, i32 1 @@ -321,7 +389,11 @@ entry: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) { ; CHECK-LABEL: test_vld1_lane_s8: -; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v0.b }[7], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = load i8, i8* %a, align 1 %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 @@ -330,7 +402,11 @@ entry: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) { ; CHECK-LABEL: test_vld1_lane_s16: -; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v0.h }[3], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = load i16, i16* %a, align 2 %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 @@ -339,7 +415,11 @@ entry: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) { ; CHECK-LABEL: test_vld1_lane_s32: -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v0.s }[1], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = load i32, i32* %a, align 4 %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1 @@ -348,7 +428,9 @@ entry: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) { ; CHECK-LABEL: test_vld1_lane_s64: -; CHECK: ldr {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret entry: %0 = load i64, i64* %a, align 8 %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0 @@ -357,7 +439,11 @@ entry: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) { ; CHECK-LABEL: test_vld1_lane_f32: -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v0.s }[1], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = load float, float* %a, align 4 %vld1_lane = insertelement <2 x float> %b, float %0, i32 1 @@ -366,7 +452,9 @@ entry: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) { ; CHECK-LABEL: test_vld1_lane_f64: -; CHECK: ldr {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret entry: %0 = load double, double* %a, align 8 %vld1_lane = insertelement <1 x double> undef, double %0, i32 0 @@ -375,7 +463,9 @@ entry: define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) { ; CHECK-LABEL: test_vst1q_lane_s8: -; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1 { v0.b }[15], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <16 x i8> %b, i32 15 store i8 %0, i8* %a, align 1 @@ -384,7 +474,9 @@ entry: define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) { ; CHECK-LABEL: test_vst1q_lane_s16: -; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1 { v0.h }[7], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <8 x i16> %b, i32 7 store i16 %0, i16* %a, align 2 @@ -393,7 +485,9 @@ entry: define void @test_vst1q_lane0_s16(i16* %a, <8 x i16> %b) { ; CHECK-LABEL: test_vst1q_lane0_s16: -; CHECK: str {{h[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <8 x i16> %b, i32 0 store i16 %0, i16* %a, align 2 @@ -402,7 +496,9 @@ entry: define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) { ; CHECK-LABEL: test_vst1q_lane_s32: -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1 { v0.s }[3], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x i32> %b, i32 3 store i32 %0, i32* %a, align 4 @@ -411,7 +507,9 @@ entry: define void @test_vst1q_lane0_s32(i32* %a, <4 x i32> %b) { ; CHECK-LABEL: test_vst1q_lane0_s32: -; CHECK: str {{s[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x i32> %b, i32 0 store i32 %0, i32* %a, align 4 @@ -420,7 +518,9 @@ entry: define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) { ; CHECK-LABEL: test_vst1q_lane_s64: -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1 { v0.d }[1], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x i64> %b, i32 1 store i64 %0, i64* %a, align 8 @@ -429,7 +529,9 @@ entry: define void @test_vst1q_lane0_s64(i64* %a, <2 x i64> %b) { ; CHECK-LABEL: test_vst1q_lane0_s64: -; CHECK: str {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x i64> %b, i32 0 store i64 %0, i64* %a, align 8 @@ -438,7 +540,9 @@ entry: define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) { ; CHECK-LABEL: test_vst1q_lane_f32: -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1 { v0.s }[3], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x float> %b, i32 3 store float %0, float* %a, align 4 @@ -447,7 +551,9 @@ entry: define void @test_vst1q_lane0_f32(float* %a, <4 x float> %b) { ; CHECK-LABEL: test_vst1q_lane0_f32: -; CHECK: str {{s[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x float> %b, i32 0 store float %0, float* %a, align 4 @@ -456,7 +562,9 @@ entry: define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) { ; CHECK-LABEL: test_vst1q_lane_f64: -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1 { v0.d }[1], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x double> %b, i32 1 store double %0, double* %a, align 8 @@ -465,7 +573,9 @@ entry: define void @test_vst1q_lane0_f64(double* %a, <2 x double> %b) { ; CHECK-LABEL: test_vst1q_lane0_f64: -; CHECK: str {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x double> %b, i32 0 store double %0, double* %a, align 8 @@ -474,7 +584,10 @@ entry: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) { ; CHECK-LABEL: test_vst1_lane_s8: -; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1 { v0.b }[7], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <8 x i8> %b, i32 7 store i8 %0, i8* %a, align 1 @@ -483,7 +596,10 @@ entry: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) { ; CHECK-LABEL: test_vst1_lane_s16: -; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1 { v0.h }[3], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x i16> %b, i32 3 store i16 %0, i16* %a, align 2 @@ -492,7 +608,10 @@ entry: define void @test_vst1_lane0_s16(i16* %a, <4 x i16> %b) { ; CHECK-LABEL: test_vst1_lane0_s16: -; CHECK: str {{h[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <4 x i16> %b, i32 0 store i16 %0, i16* %a, align 2 @@ -501,7 +620,10 @@ entry: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) { ; CHECK-LABEL: test_vst1_lane_s32: -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1 { v0.s }[1], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x i32> %b, i32 1 store i32 %0, i32* %a, align 4 @@ -510,7 +632,10 @@ entry: define void @test_vst1_lane0_s32(i32* %a, <2 x i32> %b) { ; CHECK-LABEL: test_vst1_lane0_s32: -; CHECK: str {{s[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x i32> %b, i32 0 store i32 %0, i32* %a, align 4 @@ -519,7 +644,10 @@ entry: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) { ; CHECK-LABEL: test_vst1_lane_s64: -; CHECK: str {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <1 x i64> %b, i32 0 store i64 %0, i64* %a, align 8 @@ -528,7 +656,10 @@ entry: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) { ; CHECK-LABEL: test_vst1_lane_f32: -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: st1 { v0.s }[1], [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x float> %b, i32 1 store float %0, float* %a, align 4 @@ -537,7 +668,10 @@ entry: define void @test_vst1_lane0_f32(float* %a, <2 x float> %b) { ; CHECK-LABEL: test_vst1_lane0_f32: -; CHECK: str {{s[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <2 x float> %b, i32 0 store float %0, float* %a, align 4 @@ -546,7 +680,9 @@ entry: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) { ; CHECK-LABEL: test_vst1_lane_f64: -; CHECK: str {{d[0-9]+}}, [x0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret entry: %0 = extractelement <1 x double> %b, i32 0 store double %0, double* %a, align 8