llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

366 lines
12 KiB
LLVM
Raw Normal View History

[ARM,MVE] Support immediate vbicq,vorrq,vmvnq intrinsics. Summary: Immediate vmvnq is code-generated as a simple vector constant in IR, and left to the backend to recognize that it can be created with an MVE VMVN instruction. The predicated version is represented as a select between the input and the same constant, and I've added a Tablegen isel rule to turn that into a predicated VMVN. (That should be better than the previous VMVN + VPSEL: it's the same number of instructions but now it can fold into an adjacent VPT block.) The unpredicated forms of VBIC and VORR are done by enabling the same isel lowering as for NEON, recognizing appropriate immediates and rewriting them as ARMISD::VBICIMM / ARMISD::VORRIMM SDNodes, which I then instruction-select into the right MVE instructions (now that I've also reworked those instructions to use the same MC operand encoding). In order to do that, I had to promote the Tablegen SDNode instance `NEONvorrImm` to a general `ARMvorrImm` available in MVE as well, and similarly for `NEONvbicImm`. The predicated forms of VBIC and VORR are represented as a vector select between the original input vector and the output of the unpredicated operation. The main convenience of this is that it still lets me use the existing isel lowering for VBICIMM/VORRIMM, and not have to write another copy of the operand encoding translation code. This intrinsic family is the first to use the `imm_simd` system I put into the MveEmitter tablegen backend. So, naturally, it showed up a bug or two (emitting bogus range checks and the like). Fixed those, and added a full set of tests for the permissible immediates in the existing Sema test. Also adjusted the isel pattern for `vmovlb.u8`, which stopped matching because lowering started turning its input into a VBICIMM. Now it recognizes the VBICIMM instead. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D72934
2020-01-23 19:53:42 +08:00
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh0(<8 x i16> %a) {
; CHECK-LABEL: test_vbicq_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh8(<8 x i16> %a) {
; CHECK-LABEL: test_vbicq_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh0(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh8(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh16(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh24(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
ret <4 x i32> %0
}
; The immediate in this case is legal for a VMVN but not for a VBIC,
; so in this case we expect to see the constant being prepared in
; another register.
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_illegal(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_illegal:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q1, #0x54ff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -21760, i32 -21760, i32 -21760, i32 -21760>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh0(<8 x i16> %a) {
; CHECK-LABEL: test_vorrq_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh8(<8 x i16> %a) {
; CHECK-LABEL: test_vorrq_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh0(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh8(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh16(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh24(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_n_u16() {
; CHECK-LABEL: test_vmvnq_n_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
ret <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>
}
define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_n_u32() {
; CHECK-LABEL: test_vmvnq_n_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
ret <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>
}
define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_n_u16(<8 x i16> %inactive, i16 zeroext %p) {
; CHECK-LABEL: test_vmvnq_m_n_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmvnt.i16 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = select <8 x i1> %1, <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>, <8 x i16> %inactive
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_n_u32(<4 x i32> %inactive, i16 zeroext %p) {
; CHECK-LABEL: test_vmvnq_m_n_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmvnt.i32 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = select <4 x i1> %1, <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>, <4 x i32> %inactive
ret <4 x i32> %2
}
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)