[AArch64][SVE] Add support for fcmp.

This also requires support for boolean "not", so I added boolean logic
while I was there.

Differential Revision: https://reviews.llvm.org/D76901
This commit is contained in:
Eli Friedman 2020-03-26 19:20:59 -07:00
parent e8f13f4f62
commit dacf8d3562
5 changed files with 463 additions and 48 deletions

View File

@ -198,6 +198,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
for (auto VT :
{ MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
MVT::nxv2f64 }) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
}
// Compute derived properties from the register classes
@ -7544,9 +7558,15 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
case MVT::i1: {
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
// TODO: Add special case for constant false
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
// TODO: Add special cases for splat of constant true/false.
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));

View File

@ -443,14 +443,14 @@ let Predicates = [HasSVE] in {
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z>;
defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>;
defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>;
defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>;
defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>;
defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
@ -980,11 +980,11 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq", int_aarch64_sve_fcmpeq>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne", int_aarch64_sve_fcmpne>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo", int_aarch64_sve_fcmpuo>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;

View File

@ -392,6 +392,11 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst, Instruction ptrue>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst (ptrue 31), $Op1, $Op2)>;
//
// Pseudo -> Instruction mappings
//
@ -1358,13 +1363,22 @@ class sve_int_pred_log<bits<4> opc, string asm>
}
multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op> {
multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
SDPatternOperator op_nopred = null_frag> {
def NAME : sve_int_pred_log<opc, asm>;
def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,
!cast<Instruction>(NAME), PTRUE_B>;
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,
!cast<Instruction>(NAME), PTRUE_H>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4i1, nxv4i1,
!cast<Instruction>(NAME), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,
!cast<Instruction>(NAME), PTRUE_D>;
}
@ -4518,6 +4532,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
SDPatternOperator op_nopred>
: sve_fp_3op_p_pd<opc, asm, op> {
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
!cast<Instruction>(NAME # _H), PTRUE_H>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
!cast<Instruction>(NAME # _H), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
!cast<Instruction>(NAME # _H), PTRUE_D>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
!cast<Instruction>(NAME # _S), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
!cast<Instruction>(NAME # _S), PTRUE_D>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
!cast<Instruction>(NAME # _D), PTRUE_D>;
}
//===----------------------------------------------------------------------===//
// SVE Floating Point Compare - with Zero Group

View File

@ -0,0 +1,231 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
define <vscale x 4 x i1> @oeq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: oeq:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ogt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ogt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp ogt <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @oge(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: oge:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp oge <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @olt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: olt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: ret
%y = fcmp olt <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ole(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ole:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: ret
%y = fcmp ole <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @one(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: one:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp one <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ord(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ord:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ord <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ueq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ueq:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ugt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ugt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ugt <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @uge(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: uge:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp uge <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ult(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ult:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ult <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ule(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ule:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ule <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @une(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: une:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp une <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @uno(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: uno:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp uno <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 2 x i1> @oeq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %x2) {
; CHECK-LABEL: oeq_2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 2 x float> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @ueq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %x2) {
; CHECK-LABEL: ueq_2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x float> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @oeq_2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %x2) {
; CHECK-LABEL: oeq_2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 2 x double> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @ueq_2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %x2) {
; CHECK-LABEL: ueq_2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmne p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x double> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @oeq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %x2) {
; CHECK-LABEL: oeq_2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 2 x half> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @ueq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %x2) {
; CHECK-LABEL: ueq_2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x half> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 4 x i1> @oeq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %x2) {
; CHECK-LABEL: oeq_4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 4 x half> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ueq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %x2) {
; CHECK-LABEL: ueq_4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x half> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 8 x i1> @oeq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %x2) {
; CHECK-LABEL: oeq_8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 8 x half> %x, %x2
ret <vscale x 8 x i1> %y
}
define <vscale x 8 x i1> @ueq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %x2) {
; CHECK-LABEL: ueq_8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 8 x half> %x, %x2
ret <vscale x 8 x i1> %y
}

View File

@ -1,96 +1,230 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
define <vscale x 2 x i64> @and_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: and_d
; CHECK: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: and_d:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = and <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %res
}
define <vscale x 4 x i32> @and_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: and_s
; CHECK: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: and_s:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = and <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %res
}
define <vscale x 8 x i16> @and_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: and_h
; CHECK: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: and_h:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = and <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %res
}
define <vscale x 16 x i8> @and_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: and_b
; CHECK: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: and_b:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = and <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %res
}
}
define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: and_pred_d:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: and_pred_s:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: and_pred_h:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @and_pred_b(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: and_pred_b:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res
}
define <vscale x 2 x i64> @or_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: or_d
; CHECK: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: or_d:
; CHECK: // %bb.0:
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = or <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %res
}
define <vscale x 4 x i32> @or_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: or_s
; CHECK: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: or_s:
; CHECK: // %bb.0:
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = or <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %res
}
define <vscale x 8 x i16> @or_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: or_h
; CHECK: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: or_h:
; CHECK: // %bb.0:
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = or <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %res
}
define <vscale x 16 x i8> @or_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: or_b
; CHECK: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: or_b:
; CHECK: // %bb.0:
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = or <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %res
}
}
define <vscale x 2 x i1> @or_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: or_pred_d:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = or <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @or_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: or_pred_s:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = or <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @or_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: or_pred_h:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = or <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @or_pred_b(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: or_pred_b:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = or <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res
}
define <vscale x 2 x i64> @xor_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: xor_d
; CHECK: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: xor_d:
; CHECK: // %bb.0:
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = xor <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %res
}
define <vscale x 4 x i32> @xor_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: xor_s
; CHECK: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: xor_s:
; CHECK: // %bb.0:
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = xor <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %res
}
define <vscale x 8 x i16> @xor_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: xor_h
; CHECK: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: xor_h:
; CHECK: // %bb.0:
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = xor <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %res
}
define <vscale x 16 x i8> @xor_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: xor_b
; CHECK: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
; CHECK-LABEL: xor_b:
; CHECK: // %bb.0:
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = xor <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %res
}
define <vscale x 2 x i1> @xor_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: xor_pred_d:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = xor <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @xor_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: xor_pred_s:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = xor <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @xor_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: xor_pred_h:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = xor <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @xor_pred_b(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: xor_pred_b:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = xor <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res
}