From c85766f79b2e2ebdb2a33e3456936cec11b10dc5 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 6 Jun 2021 22:30:02 +0100 Subject: [PATCH] [ARM] MVE tests for vmull from a splat. NFC --- llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll | 1444 +++++++++++++++++++ 1 file changed, 1444 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll new file mode 100644 index 000000000000..418c56d7b1c1 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll @@ -0,0 +1,1444 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0246_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 +; CHECK-NEXT: vmullb.s32 q1, q0, q2 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = sext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out1, %out2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0ext_0246: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 +; CHECK-NEXT: vmullb.s32 q1, q2, q0 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = sext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0246_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: umull lr, r12, r1, r0 +; CHECK-NEXT: umull r2, r5, r3, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r2, lr +; CHECK-NEXT: asrs r2, r0, #31 +; CHECK-NEXT: mla r4, r1, r2, r12 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: mla r2, r3, r2, r5 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: mla r1, r1, r0, r4 +; CHECK-NEXT: mla r0, r3, r0, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ext = sext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %out1, %shuf2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_ext0_0246: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: asrs r4, r0, #31 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: umull lr, r12, r0, r1 +; CHECK-NEXT: umull r2, r5, r0, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r2, lr +; CHECK-NEXT: asrs r2, r1, #31 +; CHECK-NEXT: mla r2, r0, r2, r12 +; CHECK-NEXT: mla r1, r4, r1, r2 +; CHECK-NEXT: asrs r2, r3, #31 +; CHECK-NEXT: mla r0, r0, r2, r5 +; CHECK-NEXT: mla r0, r4, r3, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ext = sext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %shuf2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_1357_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q1[2], q1[0], r0, r0 +; CHECK-NEXT: vrev64.32 q2, q0 +; CHECK-NEXT: vmullb.s32 q0, q2, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = sext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out1, %out2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0ext_1357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev64.32 q1, q0 +; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 +; CHECK-NEXT: vmullb.s32 q0, q2, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = sext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_1357_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vrev64.32 q1, q0 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: umull lr, r12, r1, r0 +; CHECK-NEXT: umull r2, r5, r3, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r2, lr +; CHECK-NEXT: asrs r2, r0, #31 +; CHECK-NEXT: mla r4, r1, r2, r12 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: mla r2, r3, r2, r5 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: mla r1, r1, r0, r4 +; CHECK-NEXT: mla r0, r3, r0, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ext = sext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %out1, %shuf2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_ext0_1357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vrev64.32 q1, q0 +; CHECK-NEXT: asrs r4, r0, #31 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: umull lr, r12, r0, r1 +; CHECK-NEXT: umull r2, r5, r0, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r2, lr +; CHECK-NEXT: asrs r2, r1, #31 +; CHECK-NEXT: mla r2, r0, r2, r12 +; CHECK-NEXT: mla r1, r4, r1, r2 +; CHECK-NEXT: asrs r2, r3, #31 +; CHECK-NEXT: mla r0, r0, r2, r5 +; CHECK-NEXT: mla r0, r4, r3, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = sext <2 x i32> %shuf1 to <2 x i64> + %ext = sext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %shuf2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0213_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmullb.s32 q0, q2, q3 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmullb.s32 q1, q2, q3 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = sext <4 x i32> %shuf1 to <4 x i64> + %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer + %out2 = sext <4 x i32> %shuf2 to <4 x i64> + %out = mul <4 x i64> %out1, %out2 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0ext_0213: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmullb.s32 q0, q3, q2 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmullb.s32 q1, q3, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = sext <4 x i32> %shuf1 to <4 x i64> + %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer + %out2 = sext <4 x i32> %shuf2 to <4 x i64> + %out = mul <4 x i64> %out2, %out1 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_0213_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s0, s4 +; CHECK-NEXT: vmov.f32 s2, s5 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: umull r2, r5, r3, r0 +; CHECK-NEXT: umull lr, r12, r1, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r2, lr +; CHECK-NEXT: asrs r2, r0, #31 +; CHECK-NEXT: mla r4, r1, r2, r12 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: mla r5, r3, r2, r5 +; CHECK-NEXT: asrs r3, r3, #31 +; CHECK-NEXT: mla r1, r1, r0, r4 +; CHECK-NEXT: vmov r4, s8 +; CHECK-NEXT: mla r3, r3, r0, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: umull r5, lr, r4, r0 +; CHECK-NEXT: umull r3, r12, r1, r0 +; CHECK-NEXT: vmov q1[2], q1[0], r5, r3 +; CHECK-NEXT: mla r3, r1, r2, r12 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: mla r2, r4, r2, lr +; CHECK-NEXT: mla r1, r1, r0, r3 +; CHECK-NEXT: asrs r3, r4, #31 +; CHECK-NEXT: mla r0, r3, r0, r2 +; CHECK-NEXT: vmov q1[3], q1[1], r0, r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = sext <4 x i32> %shuf1 to <4 x i64> + %ext = sext i32 %src2 to i64 + %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer + %out = mul <4 x i64> %out1, %shuf2 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: sext32_ext0_0213: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: asrs r4, r0, #31 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s0, s4 +; CHECK-NEXT: vmov.f32 s2, s5 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: umull r2, r5, r0, r3 +; CHECK-NEXT: umull lr, r12, r0, r1 +; CHECK-NEXT: vmov q0[2], q0[0], r2, lr +; CHECK-NEXT: asrs r2, r1, #31 +; CHECK-NEXT: mla r2, r0, r2, r12 +; CHECK-NEXT: mla r1, r4, r1, r2 +; CHECK-NEXT: asrs r2, r3, #31 +; CHECK-NEXT: mla r2, r0, r2, r5 +; CHECK-NEXT: vmov r5, s8 +; CHECK-NEXT: mla r2, r4, r3, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r2, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: umull r3, lr, r0, r5 +; CHECK-NEXT: umull r2, r12, r0, r1 +; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 +; CHECK-NEXT: asrs r2, r1, #31 +; CHECK-NEXT: mla r2, r0, r2, r12 +; CHECK-NEXT: mla r1, r4, r1, r2 +; CHECK-NEXT: asrs r2, r5, #31 +; CHECK-NEXT: mla r0, r0, r2, lr +; CHECK-NEXT: mla r0, r4, r5, r0 +; CHECK-NEXT: vmov q1[3], q1[1], r0, r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = sext <4 x i32> %shuf1 to <4 x i64> + %ext = sext i32 %src2 to i64 + %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer + %out = mul <4 x i64> %shuf2, %out1 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0246_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 +; CHECK-NEXT: vmullb.u32 q1, q0, q2 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = zext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out1, %out2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0ext_0246: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 +; CHECK-NEXT: vmullb.u32 q1, q2, q0 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = zext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0246_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: umull r1, r2, r1, r0 +; CHECK-NEXT: umull r0, r3, r3, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ext = zext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %out1, %shuf2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_ext0_0246: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: umull r1, r2, r0, r1 +; CHECK-NEXT: umull r0, r3, r0, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ext = zext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %shuf2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_1357_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q1[2], q1[0], r0, r0 +; CHECK-NEXT: vrev64.32 q2, q0 +; CHECK-NEXT: vmullb.u32 q0, q2, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = zext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out1, %out2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0ext_1357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev64.32 q1, q0 +; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 +; CHECK-NEXT: vmullb.u32 q0, q2, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer + %out2 = zext <2 x i32> %shuf2 to <2 x i64> + %out = mul <2 x i64> %out2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_1357_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev64.32 q1, q0 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: umull r1, r2, r1, r0 +; CHECK-NEXT: umull r0, r3, r3, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ext = zext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %out1, %shuf2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_ext0_1357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev64.32 q1, q0 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: umull r1, r2, r0, r1 +; CHECK-NEXT: umull r0, r3, r0, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> + %out1 = zext <2 x i32> %shuf1 to <2 x i64> + %ext = zext i32 %src2 to i64 + %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer + %out = mul <2 x i64> %shuf2, %out1 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0213_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmullb.u32 q0, q2, q3 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmullb.u32 q1, q2, q3 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = zext <4 x i32> %shuf1 to <4 x i64> + %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer + %out2 = zext <4 x i32> %shuf2 to <4 x i64> + %out = mul <4 x i64> %out1, %out2 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0ext_0213: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmullb.u32 q0, q3, q2 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmullb.u32 q1, q3, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = zext <4 x i32> %shuf1 to <4 x i64> + %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer + %out2 = zext <4 x i32> %shuf2 to <4 x i64> + %out = mul <4 x i64> %out2, %out1 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_0213_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s0, s4 +; CHECK-NEXT: vmov.f32 s2, s5 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: umull r3, r2, r3, r0 +; CHECK-NEXT: umull r1, r12, r1, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r3, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov q0[3], q0[1], r2, r12 +; CHECK-NEXT: umull r1, r2, r1, r0 +; CHECK-NEXT: umull r0, r3, r3, r0 +; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 +; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = zext <4 x i32> %shuf1 to <4 x i64> + %ext = zext i32 %src2 to i64 + %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer + %out = mul <4 x i64> %out1, %shuf2 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) { +; CHECK-LABEL: zext32_ext0_0213: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f32 s0, s4 +; CHECK-NEXT: vmov.f32 s2, s5 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s10, s7 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: umull r3, r2, r0, r3 +; CHECK-NEXT: umull r1, r12, r0, r1 +; CHECK-NEXT: vmov q0[2], q0[0], r3, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov q0[3], q0[1], r2, r12 +; CHECK-NEXT: umull r1, r2, r0, r1 +; CHECK-NEXT: umull r0, r3, r0, r3 +; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 +; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> + %out1 = zext <4 x i32> %shuf1 to <4 x i64> + %ext = zext i32 %src2 to i64 + %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 + %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer + %out = mul <4 x i64> %shuf2, %out1 + ret <4 x i64> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_02468101214_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmullb.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = sext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out1, %out2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_0ext_02468101214: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmullb.s16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = sext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_02468101214_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ext = sext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %out1, %shuf2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_ext0_02468101214: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ext = sext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %shuf2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_13579111315_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vrev32.16 q0, q0 +; CHECK-NEXT: vmullb.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = sext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out1, %out2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_0ext_13579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev32.16 q0, q0 +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmullb.s16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = sext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_13579111315_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.s16 q0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ext = sext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %out1, %shuf2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_ext0_13579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.s16 q0, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = sext <4 x i16> %shuf1 to <4 x i32> + %ext = sext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %shuf2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_02461357_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q2, r0 +; CHECK-NEXT: vrev32.16 q1, q0 +; CHECK-NEXT: vmullb.s16 q1, q1, q2 +; CHECK-NEXT: vmullb.s16 q0, q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = sext <8 x i16> %shuf1 to <8 x i32> + %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer + %out2 = sext <8 x i16> %shuf2 to <8 x i32> + %out = mul <8 x i32> %out1, %out2 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_0ext_02461357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev32.16 q1, q0 +; CHECK-NEXT: vdup.32 q2, r0 +; CHECK-NEXT: vmullb.s16 q1, q2, q1 +; CHECK-NEXT: vmullb.s16 q0, q2, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = sext <8 x i16> %shuf1 to <8 x i32> + %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer + %out2 = sext <8 x i16> %shuf2 to <8 x i32> + %out = mul <8 x i32> %out2, %out1 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_02461357_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q1, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmul.i32 q2, q1, r0 +; CHECK-NEXT: vmovlt.s16 q0, q0 +; CHECK-NEXT: vmul.i32 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = sext <8 x i16> %shuf1 to <8 x i32> + %ext = sext i16 %src2 to i32 + %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer + %out = mul <8 x i32> %out1, %shuf2 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: sext16_ext0_02461357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q1, q0 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmul.i32 q2, q1, r0 +; CHECK-NEXT: vmovlt.s16 q0, q0 +; CHECK-NEXT: vmul.i32 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = sext <8 x i16> %shuf1 to <8 x i32> + %ext = sext i16 %src2 to i32 + %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer + %out = mul <8 x i32> %shuf2, %out1 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_02468101214_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmullb.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = zext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out1, %out2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_0ext_02468101214: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmullb.u16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = zext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_02468101214_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ext = zext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %out1, %shuf2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_ext0_02468101214: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ext = zext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %shuf2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_13579111315_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vrev32.16 q0, q0 +; CHECK-NEXT: vmullb.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = zext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out1, %out2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_0ext_13579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev32.16 q0, q0 +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vmullb.u16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer + %out2 = zext <4 x i16> %shuf2 to <4 x i32> + %out = mul <4 x i32> %out2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_13579111315_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.u16 q0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ext = zext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %out1, %shuf2 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_ext0_13579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.u16 q0, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmul.i32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> + %out1 = zext <4 x i16> %shuf1 to <4 x i32> + %ext = zext i16 %src2 to i32 + %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer + %out = mul <4 x i32> %shuf2, %out1 + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_02461357_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q2, r0 +; CHECK-NEXT: vrev32.16 q1, q0 +; CHECK-NEXT: vmullb.u16 q1, q1, q2 +; CHECK-NEXT: vmullb.u16 q0, q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = zext <8 x i16> %shuf1 to <8 x i32> + %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer + %out2 = zext <8 x i16> %shuf2 to <8 x i32> + %out = mul <8 x i32> %out1, %out2 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_0ext_02461357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev32.16 q1, q0 +; CHECK-NEXT: vdup.32 q2, r0 +; CHECK-NEXT: vmullb.u16 q1, q2, q1 +; CHECK-NEXT: vmullb.u16 q0, q2, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = zext <8 x i16> %shuf1 to <8 x i32> + %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer + %out2 = zext <8 x i16> %shuf2 to <8 x i32> + %out = mul <8 x i32> %out2, %out1 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_02461357_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q1, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmul.i32 q2, q1, r0 +; CHECK-NEXT: vmovlt.u16 q0, q0 +; CHECK-NEXT: vmul.i32 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = zext <8 x i16> %shuf1 to <8 x i32> + %ext = zext i16 %src2 to i32 + %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer + %out = mul <8 x i32> %out1, %shuf2 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) { +; CHECK-LABEL: zext16_ext0_02461357: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q1, q0 +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: vmul.i32 q2, q1, r0 +; CHECK-NEXT: vmovlt.u16 q0, q0 +; CHECK-NEXT: vmul.i32 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> + %out1 = zext <8 x i16> %shuf1 to <8 x i32> + %ext = zext i16 %src2 to i32 + %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 + %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer + %out = mul <8 x i32> %shuf2, %out1 + ret <8 x i32> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_024681012141618202224262830_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vmullb.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = sext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out1, %out2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_0ext_024681012141618202224262830: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vmullb.s8 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = sext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_024681012141618202224262830_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ext = sext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %out1, %shuf2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_ext0_024681012141618202224262830: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ext = sext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %shuf2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_135791113151719212325272931_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vrev16.8 q0, q0 +; CHECK-NEXT: vmullb.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = sext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out1, %out2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_0ext_135791113151719212325272931: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev16.8 q0, q0 +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vmullb.s8 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = sext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_135791113151719212325272931_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.s8 q0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ext = sext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %out1, %shuf2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_ext0_135791113151719212325272931: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.s8 q0, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = sext <8 x i8> %shuf1 to <8 x i16> + %ext = sext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %shuf2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_0246810121413579111315_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q2, r0 +; CHECK-NEXT: vrev16.8 q1, q0 +; CHECK-NEXT: vmullb.s8 q1, q1, q2 +; CHECK-NEXT: vmullb.s8 q0, q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = sext <16 x i8> %shuf1 to <16 x i16> + %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer + %out2 = sext <16 x i8> %shuf2 to <16 x i16> + %out = mul <16 x i16> %out1, %out2 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_0ext_0246810121413579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev16.8 q1, q0 +; CHECK-NEXT: vdup.16 q2, r0 +; CHECK-NEXT: vmullb.s8 q1, q2, q1 +; CHECK-NEXT: vmullb.s8 q0, q2, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = sext <16 x i8> %shuf1 to <16 x i16> + %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer + %out2 = sext <16 x i8> %shuf2 to <16 x i16> + %out = mul <16 x i16> %out2, %out1 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_0246810121413579111315_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q1, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmul.i16 q2, q1, r0 +; CHECK-NEXT: vmovlt.s8 q0, q0 +; CHECK-NEXT: vmul.i16 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = sext <16 x i8> %shuf1 to <16 x i16> + %ext = sext i8 %src2 to i16 + %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer + %out = mul <16 x i16> %out1, %shuf2 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: sext8_ext0_0246810121413579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q1, q0 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmul.i16 q2, q1, r0 +; CHECK-NEXT: vmovlt.s8 q0, q0 +; CHECK-NEXT: vmul.i16 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = sext <16 x i8> %shuf1 to <16 x i16> + %ext = sext i8 %src2 to i16 + %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer + %out = mul <16 x i16> %shuf2, %out1 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_024681012141618202224262830_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vmullb.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = zext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out1, %out2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_0ext_024681012141618202224262830: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vmullb.u8 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = zext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_024681012141618202224262830_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ext = zext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %out1, %shuf2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_ext0_024681012141618202224262830: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ext = zext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %shuf2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_135791113151719212325272931_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vrev16.8 q0, q0 +; CHECK-NEXT: vmullb.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = zext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out1, %out2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_0ext_135791113151719212325272931: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev16.8 q0, q0 +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vmullb.u8 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer + %out2 = zext <8 x i8> %shuf2 to <8 x i16> + %out = mul <8 x i16> %out2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_135791113151719212325272931_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.u8 q0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ext = zext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %out1, %shuf2 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_ext0_135791113151719212325272931: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlt.u8 q0, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: vmul.i16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> + %out1 = zext <8 x i8> %shuf1 to <8 x i16> + %ext = zext i8 %src2 to i16 + %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer + %out = mul <8 x i16> %shuf2, %out1 + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_0246810121413579111315_0ext: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q2, r0 +; CHECK-NEXT: vrev16.8 q1, q0 +; CHECK-NEXT: vmullb.u8 q1, q1, q2 +; CHECK-NEXT: vmullb.u8 q0, q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = zext <16 x i8> %shuf1 to <16 x i16> + %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer + %out2 = zext <16 x i8> %shuf2 to <16 x i16> + %out = mul <16 x i16> %out1, %out2 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_0ext_0246810121413579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrev16.8 q1, q0 +; CHECK-NEXT: vdup.16 q2, r0 +; CHECK-NEXT: vmullb.u8 q1, q2, q1 +; CHECK-NEXT: vmullb.u8 q0, q2, q0 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = zext <16 x i8> %shuf1 to <16 x i16> + %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 + %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer + %out2 = zext <16 x i8> %shuf2 to <16 x i16> + %out = mul <16 x i16> %out2, %out1 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_0246810121413579111315_ext0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q1, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: vmul.i16 q2, q1, r0 +; CHECK-NEXT: vmovlt.u8 q0, q0 +; CHECK-NEXT: vmul.i16 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = zext <16 x i8> %shuf1 to <16 x i16> + %ext = zext i8 %src2 to i16 + %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer + %out = mul <16 x i16> %out1, %shuf2 + ret <16 x i16> %out +} + +define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) { +; CHECK-LABEL: zext8_ext0_0246810121413579111315: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q1, q0 +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: vmul.i16 q2, q1, r0 +; CHECK-NEXT: vmovlt.u8 q0, q0 +; CHECK-NEXT: vmul.i16 q1, q0, r0 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> + %out1 = zext <16 x i8> %shuf1 to <16 x i16> + %ext = zext i8 %src2 to i16 + %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 + %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer + %out = mul <16 x i16> %shuf2, %out1 + ret <16 x i16> %out +}