2019-07-24 22:17:54 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2020-04-22 23:33:11 +08:00
|
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
2019-07-24 22:17:54 +08:00
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpeqz_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q2, q0, q1
|
|
|
|
; CHECK-NEXT: vcmp.i32 eq, q2, zr
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpnez_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ne <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpsltz_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 lt, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp slt <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpsgtz_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 gt, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp sgt <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpslez_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 le, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp sle <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpsgez_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 ge, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp sge <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpultz_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpultz_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmov q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ult <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpugtz_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ugt <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpulez_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:36:47 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 cs, q1, zr
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ule <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpugez_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: cmpugez_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vcmp.i32 eq, q0, zr
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp uge <4 x i32> %b, zeroinitializer
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpeq_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i32 eq, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpne_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i32 ne, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ne <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpslt_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 gt, q2, q1
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp slt <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpsgt_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 gt, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp sgt <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpsle_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 ge, q2, q1
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp sle <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpsge_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 ge, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp sge <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpult_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpult_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 hi, q2, q1
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ult <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpugt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpugt_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 hi, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ugt <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpule_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpule_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 cs, q2, q1
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp ule <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpuge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
|
|
|
; CHECK-LABEL: cmpuge_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 cs, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%c2 = icmp uge <4 x i32> %b, %c
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-07-25 01:08:09 +08:00
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpeqr_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpeqr_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i32 eq, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp eq <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpner_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpner_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i32 ne, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp ne <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsltr_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpsltr_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 lt, q1, r0
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp slt <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsgtr_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpsgtr_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 gt, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp sgt <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsler_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpsler_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
2019-11-15 22:05:02 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 le, q1, r0
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp sle <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpsger_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpsger_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.s32 ge, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp sge <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpultr_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpultr_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vdup.32 q2, r0
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 hi, q2, q1
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp ult <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpugtr_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpugtr_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 hi, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp ugt <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpuler_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpuler_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vdup.32 q2, r0
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 cs, q2, q1
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp ule <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @cmpuger_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) {
|
|
|
|
; CHECK-LABEL: cmpuger_v4i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i32 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.u32 cs, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <4 x i32> %a, zeroinitializer
|
|
|
|
%i = insertelement <4 x i32> undef, i32 %c, i32 0
|
|
|
|
%sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
|
|
%c2 = icmp uge <4 x i32> %b, %sp
|
|
|
|
%o = and <4 x i1> %c1, %c2
|
|
|
|
%s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b
|
|
|
|
ret <4 x i32> %s
|
|
|
|
}
|
|
|
|
|
2019-07-24 22:17:54 +08:00
|
|
|
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) {
|
|
|
|
; CHECK-LABEL: cmpeqz_v8i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q2, q0, q1
|
|
|
|
; CHECK-NEXT: vcmp.i16 eq, q2, zr
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <8 x i16> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <8 x i16> %b, zeroinitializer
|
|
|
|
%o = and <8 x i1> %c1, %c2
|
|
|
|
%s = select <8 x i1> %o, <8 x i16> %a, <8 x i16> %b
|
|
|
|
ret <8 x i16> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
|
|
|
|
; CHECK-LABEL: cmpeq_v8i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i16 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i16 eq, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <8 x i16> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <8 x i16> %b, %c
|
|
|
|
%o = and <8 x i1> %c1, %c2
|
|
|
|
%s = select <8 x i1> %o, <8 x i16> %a, <8 x i16> %b
|
|
|
|
ret <8 x i16> %s
|
|
|
|
}
|
|
|
|
|
2019-07-25 01:08:09 +08:00
|
|
|
define arm_aapcs_vfpcc <8 x i16> @cmpeqr_v8i1(<8 x i16> %a, <8 x i16> %b, i16 %c) {
|
|
|
|
; CHECK-LABEL: cmpeqr_v8i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i16 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i16 eq, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <8 x i16> %a, zeroinitializer
|
|
|
|
%i = insertelement <8 x i16> undef, i16 %c, i32 0
|
|
|
|
%sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
|
|
%c2 = icmp eq <8 x i16> %b, %sp
|
|
|
|
%o = and <8 x i1> %c1, %c2
|
|
|
|
%s = select <8 x i1> %o, <8 x i16> %a, <8 x i16> %b
|
|
|
|
ret <8 x i16> %s
|
|
|
|
}
|
|
|
|
|
2019-07-24 22:17:54 +08:00
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) {
|
|
|
|
; CHECK-LABEL: cmpeqz_v16i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q2, q0, q1
|
|
|
|
; CHECK-NEXT: vcmp.i8 eq, q2, zr
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <16 x i8> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <16 x i8> %b, zeroinitializer
|
|
|
|
%o = and <16 x i1> %c1, %c2
|
|
|
|
%s = select <16 x i1> %o, <16 x i8> %a, <16 x i8> %b
|
|
|
|
ret <16 x i8> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
|
|
|
; CHECK-LABEL: cmpeq_v16i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i8 eq, q0, zr
|
2019-07-24 22:42:05 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i8 eq, q1, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <16 x i8> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <16 x i8> %b, %c
|
|
|
|
%o = and <16 x i1> %c1, %c2
|
|
|
|
%s = select <16 x i1> %o, <16 x i8> %a, <16 x i8> %b
|
|
|
|
ret <16 x i8> %s
|
|
|
|
}
|
|
|
|
|
2019-07-25 01:08:09 +08:00
|
|
|
define arm_aapcs_vfpcc <16 x i8> @cmpeqr_v16i1(<16 x i8> %a, <16 x i8> %b, i8 %c) {
|
|
|
|
; CHECK-LABEL: cmpeqr_v16i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
2019-09-16 21:02:41 +08:00
|
|
|
; CHECK-NEXT: vpt.i8 eq, q0, zr
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vcmpt.i8 eq, q1, r0
|
|
|
|
; CHECK-NEXT: vpsel q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <16 x i8> %a, zeroinitializer
|
|
|
|
%i = insertelement <16 x i8> undef, i8 %c, i32 0
|
|
|
|
%sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
|
|
|
|
%c2 = icmp eq <16 x i8> %b, %sp
|
|
|
|
%o = and <16 x i1> %c1, %c2
|
|
|
|
%s = select <16 x i1> %o, <16 x i8> %a, <16 x i8> %b
|
|
|
|
ret <16 x i8> %s
|
|
|
|
}
|
|
|
|
|
2019-07-24 22:17:54 +08:00
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|
|
|
; CHECK-LABEL: cmpeqz_v2i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q2, q0, q1
|
|
|
|
; CHECK-NEXT: vmov r0, s9
|
|
|
|
; CHECK-NEXT: vmov r1, s8
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r2, s10
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: orrs r0, r1
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s11
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: cset r0, eq
|
2019-09-03 18:53:07 +08:00
|
|
|
; CHECK-NEXT: tst.w r0, #1
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: csetm r0, ne
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: orrs r1, r2
|
|
|
|
; CHECK-NEXT: cset r1, eq
|
|
|
|
; CHECK-NEXT: tst.w r1, #1
|
|
|
|
; CHECK-NEXT: csetm r1, ne
|
|
|
|
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
|
|
|
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
|
|
|
; CHECK-NEXT: vbic q1, q1, q2
|
|
|
|
; CHECK-NEXT: vand q0, q0, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vorr q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <2 x i64> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <2 x i64> %b, zeroinitializer
|
|
|
|
%o = and <2 x i1> %c1, %c2
|
|
|
|
%s = select <2 x i1> %o, <2 x i64> %a, <2 x i64> %b
|
|
|
|
ret <2 x i64> %s
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
|
|
|
; CHECK-LABEL: cmpeq_v2i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmov r0, s9
|
|
|
|
; CHECK-NEXT: vmov r1, s5
|
|
|
|
; CHECK-NEXT: vmov r2, s4
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r3, s6
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: eors r0, r1
|
|
|
|
; CHECK-NEXT: vmov r1, s8
|
|
|
|
; CHECK-NEXT: eors r1, r2
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r2, s7
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: orrs r0, r1
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s11
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: cset r0, eq
|
2019-09-03 18:53:07 +08:00
|
|
|
; CHECK-NEXT: tst.w r0, #1
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: csetm r0, ne
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: eors r1, r2
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r2, s10
|
|
|
|
; CHECK-NEXT: eors r2, r3
|
|
|
|
; CHECK-NEXT: orrs r1, r2
|
|
|
|
; CHECK-NEXT: vmov r2, s2
|
|
|
|
; CHECK-NEXT: cset r1, eq
|
|
|
|
; CHECK-NEXT: tst.w r1, #1
|
|
|
|
; CHECK-NEXT: csetm r1, ne
|
|
|
|
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
|
|
|
|
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vmov r0, s1
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s0
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: orrs r0, r1
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s3
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: cset r0, eq
|
2019-09-03 18:53:07 +08:00
|
|
|
; CHECK-NEXT: tst.w r0, #1
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: csetm r0, ne
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: orrs r1, r2
|
|
|
|
; CHECK-NEXT: cset r1, eq
|
|
|
|
; CHECK-NEXT: tst.w r1, #1
|
|
|
|
; CHECK-NEXT: csetm r1, ne
|
|
|
|
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
|
|
|
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
|
|
|
; CHECK-NEXT: vand q2, q3, q2
|
2019-07-24 22:17:54 +08:00
|
|
|
; CHECK-NEXT: vbic q1, q1, q2
|
|
|
|
; CHECK-NEXT: vand q0, q0, q2
|
|
|
|
; CHECK-NEXT: vorr q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <2 x i64> %a, zeroinitializer
|
|
|
|
%c2 = icmp eq <2 x i64> %b, %c
|
|
|
|
%o = and <2 x i1> %c1, %c2
|
|
|
|
%s = select <2 x i1> %o, <2 x i64> %a, <2 x i64> %b
|
|
|
|
ret <2 x i64> %s
|
|
|
|
}
|
|
|
|
|
2019-07-25 01:08:09 +08:00
|
|
|
define arm_aapcs_vfpcc <2 x i64> @cmpeqr_v2i1(<2 x i64> %a, <2 x i64> %b, i64 %c) {
|
|
|
|
; CHECK-LABEL: cmpeqr_v2i1:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmov r2, s5
|
|
|
|
; CHECK-NEXT: vmov r3, s4
|
|
|
|
; CHECK-NEXT: eors r2, r1
|
|
|
|
; CHECK-NEXT: eors r3, r0
|
|
|
|
; CHECK-NEXT: orrs r2, r3
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r3, s7
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: cset r2, eq
|
2019-09-03 18:53:07 +08:00
|
|
|
; CHECK-NEXT: tst.w r2, #1
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: csetm r2, ne
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: eors r1, r3
|
|
|
|
; CHECK-NEXT: vmov r3, s6
|
|
|
|
; CHECK-NEXT: eors r0, r3
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: orrs r0, r1
|
|
|
|
; CHECK-NEXT: vmov r1, s0
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: cset r0, eq
|
2019-09-03 18:53:07 +08:00
|
|
|
; CHECK-NEXT: tst.w r0, #1
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: csetm r0, ne
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
|
|
|
|
; CHECK-NEXT: vmov q2[3], q2[1], r0, r2
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vmov r0, s1
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r2, s2
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: orrs r0, r1
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: vmov r1, s3
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: cset r0, eq
|
2019-09-03 18:53:07 +08:00
|
|
|
; CHECK-NEXT: tst.w r0, #1
|
2019-09-03 19:30:54 +08:00
|
|
|
; CHECK-NEXT: csetm r0, ne
|
[ARM] Match dual lane vmovs from insert_vector_elt
MVE has a dual lane vector move instruction, capable of moving two
general purpose registers into lanes of a vector register. They look
like one of:
vmov q0[2], q0[0], r2, r0
vmov q0[3], q0[1], r3, r1
They only accept these lane indices though (and only insert into an
i32), either moving lanes 1 and 3, or 0 and 2.
This patch adds some tablegen patterns for them, selecting from vector
inserts elements. Because the insert_elements are know to be
canonicalized to ascending order there are several patterns that we need
to select. These lane indices are:
3 2 1 0 -> vmovqrr 31; vmovqrr 20
3 2 1 -> vmovqrr 31; vmov 2
3 1 -> vmovqrr 31
2 1 0 -> vmovqrr 20; vmov 1
2 0 -> vmovqrr 20
With the top one being the most common. All other potential patterns of
lane indices will be matched by a combination of these and the
individual vmov pattern already present. This does mean that we are
selecting several machine instructions at once due to the need to
re-arrange the inserts, but in this case there is nothing else that will
attempt to match an insert_vector_elt node.
Differential Revision: https://reviews.llvm.org/D92553
2020-12-15 23:58:52 +08:00
|
|
|
; CHECK-NEXT: orrs r1, r2
|
|
|
|
; CHECK-NEXT: cset r1, eq
|
|
|
|
; CHECK-NEXT: tst.w r1, #1
|
|
|
|
; CHECK-NEXT: csetm r1, ne
|
|
|
|
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
|
|
|
|
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
2019-07-25 01:08:09 +08:00
|
|
|
; CHECK-NEXT: vand q2, q3, q2
|
|
|
|
; CHECK-NEXT: vbic q1, q1, q2
|
|
|
|
; CHECK-NEXT: vand q0, q0, q2
|
|
|
|
; CHECK-NEXT: vorr q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%c1 = icmp eq <2 x i64> %a, zeroinitializer
|
|
|
|
%i = insertelement <2 x i64> undef, i64 %c, i32 0
|
|
|
|
%sp = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
|
|
%c2 = icmp eq <2 x i64> %b, %sp
|
|
|
|
%o = and <2 x i1> %c1, %c2
|
|
|
|
%s = select <2 x i1> %o, <2 x i64> %a, <2 x i64> %b
|
|
|
|
ret <2 x i64> %s
|
|
|
|
}
|