[AArch64][SVE] Implement additional floating-point arithmetic intrinsics

Summary:
Adds intrinsics for the following:
  - ftssel
  - fcadd, fcmla
  - fmla, fmls, fnmla, fnmls
  - fmad, fmsb, fnmad, fnmsb

Reviewers: sdesmalen, huntergr, dancgr, mgudim

Reviewed By: sdesmalen

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69707
This commit is contained in:
Kerry McLaughlin 2019-11-14 11:35:38 +00:00
parent e03a06b348
commit f7848fd8f7
5 changed files with 798 additions and 38 deletions

View File

@ -771,6 +771,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArgIndexed_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_3VectorArgIndexed_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_CNT_Intrinsic
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
[LLVMVectorOfBitcastsToInt<0>,
@ -783,6 +798,32 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
class AdvSIMD_SVE_CADD_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_CMLA_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_CMLA_LANE_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty,
llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_SVE_PUNPKHI_Intrinsic
: Intrinsic<[LLVMHalfElementsVectorType<0>],
[llvm_anyvector_ty],
@ -926,18 +967,34 @@ def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic;
def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic;
def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;
//
// Floating-point comparisons

View File

@ -758,6 +758,13 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_7Operand;
}
// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
def imm32_0_7 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 8;
}]> {
let ParserMatchClass = Imm0_7Operand;
}
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
@ -10056,15 +10063,20 @@ class ComplexRotationOperand<int Angle, int Remainder, string Type>
let DiagnosticType = "InvalidComplexRotation" # Type;
let Name = "ComplexRotation" # Type;
}
def complexrotateop : Operand<i32> {
def complexrotateop : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
let PrintMethod = "printComplexRotationOp<90, 0>";
}
def complexrotateopodd : Operand<i32> {
def complexrotateopodd : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
let PrintMethod = "printComplexRotationOp<180, 90>";
}
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
RegisterOperand regtype, Operand rottype,

View File

@ -145,28 +145,28 @@ let Predicates = [HasSVE] in {
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>;
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">;
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">;
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">;
defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">;
defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">;
defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">;
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", int_aarch64_sve_fmla>;
defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", int_aarch64_sve_fmls>;
defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", int_aarch64_sve_fnmla>;
defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", int_aarch64_sve_fnmls>;
defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">;
defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">;
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">;
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">;
defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad>;
defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb>;
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>;
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>;
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">;
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">;
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">;
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
// SVE floating point reductions.
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">;

View File

@ -299,7 +299,8 @@ class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
(inst $Op1, $Op2, $Op3)>;
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4, Instruction inst>
ValueType vt2, ValueType vt3, ValueType vt4,
Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
(inst $Op1, $Op2, $Op3, $Op4)>;
@ -1225,7 +1226,7 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3),
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
"",
[]>, Sched<[]> {
@ -1245,10 +1246,17 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let ElementSize = ElementSizeNone;
}
multiclass sve_fp_ftmad<string asm> {
multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
def _H : sve_fp_ftmad<0b01, asm, ZPR16>;
def _S : sve_fp_ftmad<0b10, asm, ZPR32>;
def _D : sve_fp_ftmad<0b11, asm, ZPR64>;
def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))),
(!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>;
def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))),
(!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>;
def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))),
(!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
}
@ -1323,10 +1331,14 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>;
def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
@ -1354,10 +1366,14 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>;
def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@ -1384,26 +1400,34 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> {
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm,
SDPatternOperator op> {
def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{22} = iop{2};
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> {
def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> {
def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD32b> {
bits<4> Zm;
bit iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
}
@ -1425,26 +1449,33 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
multiclass sve_fp_fmul_by_indexed_elem<string asm> {
def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> {
multiclass sve_fp_fmul_by_indexed_elem<string asm, SDPatternOperator op> {
def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{22} = iop{2};
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> {
def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> {
def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> {
bits<4> Zm;
bit iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
(!cast<Instruction>(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
(!cast<Instruction>(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
(!cast<Instruction>(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
}
//===----------------------------------------------------------------------===//
@ -1476,10 +1507,17 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcmla<string asm> {
multiclass sve_fp_fcmla<string asm, SDPatternOperator op> {
def _H : sve_fp_fcmla<0b01, asm, ZPR16>;
def _S : sve_fp_fcmla<0b10, asm, ZPR32>;
def _D : sve_fp_fcmla<0b11, asm, ZPR64>;
def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, nxv8f16:$Op4, (i32 complexrotateop:$imm))),
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, nxv4f32:$Op4, (i32 complexrotateop:$imm))),
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, nxv2f64:$Op4, (i32 complexrotateop:$imm))),
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
@ -1509,19 +1547,24 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let ElementSize = ElementSizeNone;
}
multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> {
multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> {
def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD32b> {
bits<4> Zm;
bits<1> iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
@ -1552,10 +1595,17 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_fcadd<string asm> {
multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
def _H : sve_fp_fcadd<0b01, asm, ZPR16>;
def _S : sve_fp_fcadd<0b10, asm, ZPR32>;
def _D : sve_fp_fcadd<0b11, asm, ZPR64>;
def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 complexrotateopodd:$imm))),
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 complexrotateopodd:$imm))),
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 complexrotateopodd:$imm))),
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
}
//===----------------------------------------------------------------------===//
@ -5646,10 +5696,14 @@ class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
multiclass sve_int_bin_cons_misc_0_b<string asm> {
multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> {
def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>;
def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>;
def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>;
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>

View File

@ -68,6 +68,111 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
ret <vscale x 2 x double> %out
}
;
; FCADD
;
define <vscale x 8 x half> @fcadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fcadd_h:
; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
i32 90)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fcadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcadd_s:
; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
i32 270)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fcadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fcadd_d:
; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
i32 90)
ret <vscale x 2 x double> %out
}
;
; FCMLA
;
define <vscale x 8 x half> @fcmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fcmla_h:
; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c,
i32 90)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fcmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fcmla_s:
; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c,
i32 180)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fcmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fcmla_d:
; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c,
i32 270)
ret <vscale x 2 x double> %out
}
;
; FCMLA (Indexed)
;
define <vscale x 8 x half> @fcmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fcmla_lane_h:
; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c,
i32 3,
i32 0)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fcmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fcmla_lane_s:
; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c,
i32 1,
i32 90)
ret <vscale x 4 x float> %out
}
;
; FDIV
;
@ -136,6 +241,43 @@ define <vscale x 2 x double> @fdivr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
ret <vscale x 2 x double> %out
}
;
; FMAD
;
define <vscale x 8 x half> @fmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fmad_h:
; CHECK: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fmad_s:
; CHECK: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fmad_d:
; CHECK: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FMAX
;
@ -272,6 +414,191 @@ define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
ret <vscale x 2 x double> %out
}
;
; FMLA
;
define <vscale x 8 x half> @fmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fmla_h:
; CHECK: fmla z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fmla_s:
; CHECK: fmla z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fmla_d:
; CHECK: fmla z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FMLA (Indexed)
;
define <vscale x 8 x half> @fmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fmla_lane_h:
; CHECK: fmla z0.h, z1.h, z2.h[3]
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c,
i32 3)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fmla_lane_s:
; CHECK: fmla z0.s, z1.s, z2.s[2]
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c,
i32 2)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmla_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fmla_lane_d:
; CHECK: fmla z0.d, z1.d, z2.d[1]
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c,
i32 1)
ret <vscale x 2 x double> %out
}
;
; FMLS
;
define <vscale x 8 x half> @fmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fmls_h:
; CHECK: fmls z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fmls_s:
; CHECK: fmls z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fmls_d:
; CHECK: fmls z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FMLS (Indexed)
;
define <vscale x 8 x half> @fmls_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fmls_lane_h:
; CHECK: fmls z0.h, z1.h, z2.h[3]
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c,
i32 3)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmls_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fmls_lane_s:
; CHECK: fmls z0.s, z1.s, z2.s[2]
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c,
i32 2)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmls_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fmls_lane_d:
; CHECK: fmls z0.d, z1.d, z2.d[1]
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c,
i32 1)
ret <vscale x 2 x double> %out
}
;
; FMSB
;
define <vscale x 8 x half> @fmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fmsb_h:
; CHECK: fmsb z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fmsb_s:
; CHECK: fmsb z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fmsb_d:
; CHECK: fmsb z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FMUL
;
@ -306,6 +633,40 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
ret <vscale x 2 x double> %out
}
;
; FMUL (Indexed)
;
define <vscale x 8 x half> @fmul_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fmul_lane_h:
; CHECK: fmul z0.h, z0.h, z1.h[3]
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
i32 3)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fmul_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmul_lane_s:
; CHECK: fmul z0.s, z0.s, z1.s[2]
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
i32 2)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmul_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmul_lane_d:
; CHECK: fmul z0.d, z0.d, z1.d[1]
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
i32 1)
ret <vscale x 2 x double> %out
}
;
; FMULX
;
@ -374,6 +735,154 @@ define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
ret <vscale x 2 x double> %out
}
;
; FNMAD
;
define <vscale x 8 x half> @fnmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fnmad_h:
; CHECK: fnmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fnmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fnmad_s:
; CHECK: fnmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fnmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fnmad_d:
; CHECK: fnmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FNMLA
;
define <vscale x 8 x half> @fnmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fnmla_h:
; CHECK: fnmla z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fnmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fnmla_s:
; CHECK: fnmla z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fnmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fnmla_d:
; CHECK: fnmla z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FNMLS
;
define <vscale x 8 x half> @fnmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fnmls_h:
; CHECK: fnmls z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fnmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fnmls_s:
; CHECK: fnmls z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fnmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fnmls_d:
; CHECK: fnmls z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FNMSB
;
define <vscale x 8 x half> @fnmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fnmsb_h:
; CHECK: fnmsb z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> %pg,
<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
<vscale x 8 x half> %c)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fnmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fnmsb_s:
; CHECK: fnmsb z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
<vscale x 4 x float> %c)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fnmsb_d:
; CHECK: fnmsb z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
<vscale x 2 x double> %c)
ret <vscale x 2 x double> %out
}
;
; FSUB
;
@ -442,6 +951,40 @@ define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
ret <vscale x 2 x double> %out
}
;
; FTMAD
;
define <vscale x 8 x half> @ftmad_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: ftmad_h:
; CHECK: ftmad z0.h, z0.h, z1.h, #0
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x half> %b,
i32 0)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @ftmad_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: ftmad_s:
; CHECK: ftmad z0.s, z0.s, z1.s, #0
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x float> %b,
i32 0)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @ftmad_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: ftmad_d:
; CHECK: ftmad z0.d, z0.d, z1.d, #7
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x double> %b,
i32 7)
ret <vscale x 2 x double> %out
}
;
; FTSMUL
;
@ -473,6 +1016,37 @@ define <vscale x 2 x double> @ftsmul_d(<vscale x 2 x double> %a, <vscale x 2 x i
ret <vscale x 2 x double> %out
}
;
; FTSSEL
;
define <vscale x 8 x half> @ftssel_h(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: ftssel_h:
; CHECK: ftssel z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @ftssel_s(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: ftssel_s:
; CHECK: ftssel z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @ftssel_d(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: ftssel_d:
; CHECK: ftssel z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x double> %out
}
declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@ -481,6 +1055,17 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <v
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@ -489,6 +1074,10 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1>, <
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@ -505,14 +1094,54 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>,
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x i16>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x i32>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x i64>)
@ -525,6 +1154,14 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1>, <
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
declare <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
declare <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.ftsmul.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
declare <vscale x 4 x float> @llvm.aarch64.sve.ftsmul.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
declare <vscale x 2 x double> @llvm.aarch64.sve.ftsmul.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
declare <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
declare <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)