2020-05-25 19:25:03 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2020-10-30 23:06:49 +08:00
|
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
|
2020-05-25 19:25:03 +08:00
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <2 x i32> @vmulhs_v2i32(<2 x i32> %s0, <2 x i32> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhs_v2i32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmullb.s32 q2, q0, q1
|
2020-12-18 21:33:40 +08:00
|
|
|
; CHECK-NEXT: vmov r0, s11
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-19 00:13:08 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s9
|
|
|
|
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
|
2020-12-18 21:33:40 +08:00
|
|
|
; CHECK-NEXT: asrs r0, r0, #31
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-19 00:13:08 +08:00
|
|
|
; CHECK-NEXT: asrs r1, r1, #31
|
|
|
|
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = sext <2 x i32> %s0 to <2 x i64>
|
|
|
|
%s1s = sext <2 x i32> %s1 to <2 x i64>
|
|
|
|
%m = mul <2 x i64> %s0s, %s1s
|
|
|
|
%s = ashr <2 x i64> %m, <i64 32, i64 32>
|
|
|
|
%s2 = trunc <2 x i64> %s to <2 x i32>
|
|
|
|
ret <2 x i32> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <2 x i32> @vmulhu_v2i32(<2 x i32> %s0, <2 x i32> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhu_v2i32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmullb.u32 q2, q0, q1
|
|
|
|
; CHECK-NEXT: vldr s1, .LCPI1_0
|
|
|
|
; CHECK-NEXT: vmov.f32 s0, s9
|
|
|
|
; CHECK-NEXT: vmov.f32 s2, s11
|
|
|
|
; CHECK-NEXT: vmov.f32 s3, s1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
; CHECK-NEXT: .p2align 2
|
|
|
|
; CHECK-NEXT: @ %bb.1:
|
|
|
|
; CHECK-NEXT: .LCPI1_0:
|
|
|
|
; CHECK-NEXT: .long 0x00000000 @ float 0
|
|
|
|
entry:
|
|
|
|
%s0s = zext <2 x i32> %s0 to <2 x i64>
|
|
|
|
%s1s = zext <2 x i32> %s1 to <2 x i64>
|
|
|
|
%m = mul <2 x i64> %s0s, %s1s
|
|
|
|
%s = lshr <2 x i64> %m, <i64 32, i64 32>
|
|
|
|
%s2 = trunc <2 x i64> %s to <2 x i32>
|
|
|
|
ret <2 x i32> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @vmulhs_v4i32(<4 x i32> %s0, <4 x i32> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhs_v4i32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-19 00:13:08 +08:00
|
|
|
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
|
|
|
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: vmov.f32 s8, s4
|
|
|
|
; CHECK-NEXT: vmov.f32 s12, s0
|
2020-12-18 21:33:40 +08:00
|
|
|
; CHECK-NEXT: vmov.f32 s10, s5
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-19 00:13:08 +08:00
|
|
|
; CHECK-NEXT: vmov.f32 s14, s1
|
|
|
|
; CHECK-NEXT: vmov r0, s8
|
|
|
|
; CHECK-NEXT: vmov r1, s12
|
|
|
|
; CHECK-NEXT: vmov.f32 s16, s6
|
|
|
|
; CHECK-NEXT: vmov.f32 s18, s7
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: vmov.f32 s4, s2
|
|
|
|
; CHECK-NEXT: vmov.f32 s6, s3
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-19 00:13:08 +08:00
|
|
|
; CHECK-NEXT: vmullb.s32 q5, q1, q4
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: smmul r0, r1, r0
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
This is a recommit of 6cc3d80a84884a79967fffa4596c14001b8ba8a3 after
fixing the backward instruction definitions.
2020-12-19 00:13:08 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s21
|
|
|
|
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
|
|
|
|
; CHECK-NEXT: vmov r0, s10
|
|
|
|
; CHECK-NEXT: vmov r1, s14
|
|
|
|
; CHECK-NEXT: smmul r0, r1, r0
|
|
|
|
; CHECK-NEXT: vmov r1, s23
|
|
|
|
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
|
|
|
|
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = sext <4 x i32> %s0 to <4 x i64>
|
|
|
|
%s1s = sext <4 x i32> %s1 to <4 x i64>
|
|
|
|
%m = mul <4 x i64> %s0s, %s1s
|
|
|
|
%s = ashr <4 x i64> %m, <i64 32, i64 32, i64 32, i64 32>
|
|
|
|
%s2 = trunc <4 x i64> %s to <4 x i32>
|
|
|
|
ret <4 x i32> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @vmulhu_v4i32(<4 x i32> %s0, <4 x i32> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhu_v4i32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: .vsave {d8, d9}
|
|
|
|
; CHECK-NEXT: vpush {d8, d9}
|
|
|
|
; CHECK-NEXT: vmov.f32 s12, s6
|
|
|
|
; CHECK-NEXT: vmov.f32 s16, s2
|
|
|
|
; CHECK-NEXT: vmov.f32 s14, s7
|
|
|
|
; CHECK-NEXT: vmov.f32 s18, s3
|
|
|
|
; CHECK-NEXT: vmov.f32 s6, s5
|
|
|
|
; CHECK-NEXT: vmullb.u32 q2, q4, q3
|
|
|
|
; CHECK-NEXT: vmov.f32 s2, s1
|
|
|
|
; CHECK-NEXT: vmullb.u32 q3, q0, q1
|
|
|
|
; CHECK-NEXT: vmov.f32 s0, s13
|
|
|
|
; CHECK-NEXT: vmov.f32 s1, s15
|
|
|
|
; CHECK-NEXT: vmov.f32 s2, s9
|
|
|
|
; CHECK-NEXT: vmov.f32 s3, s11
|
|
|
|
; CHECK-NEXT: vpop {d8, d9}
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = zext <4 x i32> %s0 to <4 x i64>
|
|
|
|
%s1s = zext <4 x i32> %s1 to <4 x i64>
|
|
|
|
%m = mul <4 x i64> %s0s, %s1s
|
|
|
|
%s = lshr <4 x i64> %m, <i64 32, i64 32, i64 32, i64 32>
|
|
|
|
%s2 = trunc <4 x i64> %s to <4 x i32>
|
|
|
|
ret <4 x i32> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i16> @vmulhs_v4i16(<4 x i16> %s0, <4 x i16> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhs_v4i16:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmullb.s16 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.s32 q0, q0, #16
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = sext <4 x i16> %s0 to <4 x i32>
|
|
|
|
%s1s = sext <4 x i16> %s1 to <4 x i32>
|
|
|
|
%m = mul <4 x i32> %s0s, %s1s
|
|
|
|
%s = ashr <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16>
|
|
|
|
%s2 = trunc <4 x i32> %s to <4 x i16>
|
|
|
|
ret <4 x i16> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i16> @vmulhu_v4i16(<4 x i16> %s0, <4 x i16> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhu_v4i16:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmullb.u16 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.u32 q0, q0, #16
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = zext <4 x i16> %s0 to <4 x i32>
|
|
|
|
%s1s = zext <4 x i16> %s1 to <4 x i32>
|
|
|
|
%m = mul <4 x i32> %s0s, %s1s
|
|
|
|
%s = lshr <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16>
|
|
|
|
%s2 = trunc <4 x i32> %s to <4 x i16>
|
|
|
|
ret <4 x i16> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhs_v8i16:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmullt.s16 q2, q0, q1
|
|
|
|
; CHECK-NEXT: vmullb.s16 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.u32 q2, q2, #16
|
2021-01-21 20:58:16 +08:00
|
|
|
; CHECK-NEXT: vshr.u32 q0, q0, #16
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmovnt.i32 q0, q2
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = sext <8 x i16> %s0 to <8 x i32>
|
|
|
|
%s1s = sext <8 x i16> %s1 to <8 x i32>
|
|
|
|
%m = mul <8 x i32> %s0s, %s1s
|
|
|
|
%s = ashr <8 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
|
|
|
%s2 = trunc <8 x i32> %s to <8 x i16>
|
|
|
|
ret <8 x i16> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhu_v8i16:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmullt.u16 q2, q0, q1
|
|
|
|
; CHECK-NEXT: vmullb.u16 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.u32 q2, q2, #16
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: vshr.u32 q0, q0, #16
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmovnt.i32 q0, q2
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = zext <8 x i16> %s0 to <8 x i32>
|
|
|
|
%s1s = zext <8 x i16> %s1 to <8 x i32>
|
|
|
|
%m = mul <8 x i32> %s0s, %s1s
|
|
|
|
%s = lshr <8 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
|
|
|
%s2 = trunc <8 x i32> %s to <8 x i16>
|
|
|
|
ret <8 x i16> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i8> @vmulhs_v8i8(<8 x i8> %s0, <8 x i8> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhs_v8i8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmullb.s8 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.s16 q0, q0, #8
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = sext <8 x i8> %s0 to <8 x i16>
|
|
|
|
%s1s = sext <8 x i8> %s1 to <8 x i16>
|
|
|
|
%m = mul <8 x i16> %s0s, %s1s
|
|
|
|
%s = ashr <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
|
|
|
%s2 = trunc <8 x i16> %s to <8 x i8>
|
|
|
|
ret <8 x i8> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i8> @vmulhu_v8i8(<8 x i8> %s0, <8 x i8> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhu_v8i8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmullb.u8 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.u16 q0, q0, #8
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = zext <8 x i8> %s0 to <8 x i16>
|
|
|
|
%s1s = zext <8 x i8> %s1 to <8 x i16>
|
|
|
|
%m = mul <8 x i16> %s0s, %s1s
|
|
|
|
%s = lshr <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
|
|
|
%s2 = trunc <8 x i16> %s to <8 x i8>
|
|
|
|
ret <8 x i8> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @vmulhs_v16i8(<16 x i8> %s0, <16 x i8> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhs_v16i8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmullt.s8 q2, q0, q1
|
|
|
|
; CHECK-NEXT: vmullb.s8 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.u16 q2, q2, #8
|
2021-01-21 20:58:16 +08:00
|
|
|
; CHECK-NEXT: vshr.u16 q0, q0, #8
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmovnt.i16 q0, q2
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = sext <16 x i8> %s0 to <16 x i16>
|
|
|
|
%s1s = sext <16 x i8> %s1 to <16 x i16>
|
|
|
|
%m = mul <16 x i16> %s0s, %s1s
|
|
|
|
%s = ashr <16 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
|
|
|
%s2 = trunc <16 x i16> %s to <16 x i8>
|
|
|
|
ret <16 x i8> %s2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @vmulhu_v16i8(<16 x i8> %s0, <16 x i8> %s1) {
|
|
|
|
; CHECK-LABEL: vmulhu_v16i8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmullt.u8 q2, q0, q1
|
|
|
|
; CHECK-NEXT: vmullb.u8 q0, q0, q1
|
|
|
|
; CHECK-NEXT: vshr.u16 q2, q2, #8
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: vshr.u16 q0, q0, #8
|
2021-03-29 16:05:45 +08:00
|
|
|
; CHECK-NEXT: vmovnt.i16 q0, q2
|
2020-05-25 19:25:03 +08:00
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%s0s = zext <16 x i8> %s0 to <16 x i16>
|
|
|
|
%s1s = zext <16 x i8> %s1 to <16 x i16>
|
|
|
|
%m = mul <16 x i16> %s0s, %s1s
|
|
|
|
%s = lshr <16 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
|
|
|
%s2 = trunc <16 x i16> %s to <16 x i8>
|
|
|
|
ret <16 x i8> %s2
|
|
|
|
}
|