forked from OSchip/llvm-project
[AArch64][SVE] Implement additional floating-point arithmetic intrinsics
Summary: Adds intrinsics for the following: - ftssel - fcadd, fcmla - fmla, fmls, fnmla, fnmls - fmad, fmsb, fnmad, fnmsb Reviewers: sdesmalen, huntergr, dancgr, mgudim Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69707
This commit is contained in:
parent
e03a06b348
commit
f7848fd8f7
|
@ -771,6 +771,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
|||
LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_2VectorArgIndexed_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_3VectorArgIndexed_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_CNT_Intrinsic
|
||||
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
|
||||
[LLVMVectorOfBitcastsToInt<0>,
|
||||
|
@ -783,6 +798,32 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
|||
[LLVMSubdivide2VectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_CADD_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_CMLA_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_CMLA_LANE_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_i32_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_PUNPKHI_Intrinsic
|
||||
: Intrinsic<[LLVMHalfElementsVectorType<0>],
|
||||
[llvm_anyvector_ty],
|
||||
|
@ -926,18 +967,34 @@ def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
|
|||
|
||||
def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic;
|
||||
def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic;
|
||||
def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
|
||||
def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
|
||||
def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
|
||||
def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
|
||||
def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
|
||||
def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
|
||||
def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
|
||||
def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;
|
||||
|
||||
//
|
||||
// Floating-point comparisons
|
||||
|
|
|
@ -758,6 +758,13 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
|
|||
let ParserMatchClass = Imm0_7Operand;
|
||||
}
|
||||
|
||||
// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
|
||||
def imm32_0_7 : Operand<i32>, ImmLeaf<i32, [{
|
||||
return ((uint32_t)Imm) < 8;
|
||||
}]> {
|
||||
let ParserMatchClass = Imm0_7Operand;
|
||||
}
|
||||
|
||||
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
|
||||
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
|
||||
return ((uint32_t)Imm) < 16;
|
||||
|
@ -10056,15 +10063,20 @@ class ComplexRotationOperand<int Angle, int Remainder, string Type>
|
|||
let DiagnosticType = "InvalidComplexRotation" # Type;
|
||||
let Name = "ComplexRotation" # Type;
|
||||
}
|
||||
def complexrotateop : Operand<i32> {
|
||||
def complexrotateop : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
|
||||
SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32);
|
||||
}]>> {
|
||||
let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
|
||||
let PrintMethod = "printComplexRotationOp<90, 0>";
|
||||
}
|
||||
def complexrotateopodd : Operand<i32> {
|
||||
def complexrotateopodd : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
|
||||
SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32);
|
||||
}]>> {
|
||||
let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
|
||||
let PrintMethod = "printComplexRotationOp<180, 90>";
|
||||
}
|
||||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
|
||||
class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
|
||||
RegisterOperand regtype, Operand rottype,
|
||||
|
|
|
@ -145,28 +145,28 @@ let Predicates = [HasSVE] in {
|
|||
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
|
||||
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
|
||||
|
||||
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
|
||||
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>;
|
||||
|
||||
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">;
|
||||
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">;
|
||||
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
|
||||
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
|
||||
|
||||
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">;
|
||||
defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">;
|
||||
defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">;
|
||||
defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">;
|
||||
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", int_aarch64_sve_fmla>;
|
||||
defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", int_aarch64_sve_fmls>;
|
||||
defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", int_aarch64_sve_fnmla>;
|
||||
defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", int_aarch64_sve_fnmls>;
|
||||
|
||||
defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">;
|
||||
defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">;
|
||||
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">;
|
||||
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">;
|
||||
defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad>;
|
||||
defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb>;
|
||||
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>;
|
||||
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>;
|
||||
|
||||
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">;
|
||||
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
|
||||
|
||||
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">;
|
||||
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">;
|
||||
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
|
||||
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;
|
||||
|
||||
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">;
|
||||
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">;
|
||||
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
|
||||
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
|
||||
|
||||
// SVE floating point reductions.
|
||||
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">;
|
||||
|
|
|
@ -299,7 +299,8 @@ class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
|||
(inst $Op1, $Op2, $Op3)>;
|
||||
|
||||
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, ValueType vt4, Instruction inst>
|
||||
ValueType vt2, ValueType vt3, ValueType vt4,
|
||||
Instruction inst>
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
|
||||
(inst $Op1, $Op2, $Op3, $Op4)>;
|
||||
|
||||
|
@ -1225,7 +1226,7 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
|
|||
}
|
||||
|
||||
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3),
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
|
||||
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
|
@ -1245,10 +1246,17 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
|
|||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve_fp_ftmad<string asm> {
|
||||
multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_ftmad<0b01, asm, ZPR16>;
|
||||
def _S : sve_fp_ftmad<0b10, asm, ZPR32>;
|
||||
def _D : sve_fp_ftmad<0b11, asm, ZPR64>;
|
||||
|
||||
def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))),
|
||||
(!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>;
|
||||
def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))),
|
||||
(!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>;
|
||||
def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))),
|
||||
(!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1323,10 +1331,14 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
|
|||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
|
||||
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>;
|
||||
|
||||
def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
|
||||
|
@ -1354,10 +1366,14 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
|
|||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
|
||||
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>;
|
||||
|
||||
def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1384,26 +1400,34 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
|
|||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
|
||||
def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> {
|
||||
multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
|
||||
bits<3> Zm;
|
||||
bits<3> iop;
|
||||
let Inst{22} = iop{2};
|
||||
let Inst{20-19} = iop{1-0};
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> {
|
||||
def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS32b> {
|
||||
bits<3> Zm;
|
||||
bits<2> iop;
|
||||
let Inst{20-19} = iop;
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> {
|
||||
def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD32b> {
|
||||
bits<4> Zm;
|
||||
bit iop;
|
||||
let Inst{20} = iop;
|
||||
let Inst{19-16} = Zm;
|
||||
}
|
||||
|
||||
def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
|
||||
def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
|
||||
def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1425,26 +1449,33 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, string asm, ZPRRegOp zprty,
|
|||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve_fp_fmul_by_indexed_elem<string asm> {
|
||||
def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> {
|
||||
multiclass sve_fp_fmul_by_indexed_elem<string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> {
|
||||
bits<3> Zm;
|
||||
bits<3> iop;
|
||||
let Inst{22} = iop{2};
|
||||
let Inst{20-19} = iop{1-0};
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> {
|
||||
def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> {
|
||||
bits<3> Zm;
|
||||
bits<2> iop;
|
||||
let Inst{20-19} = iop;
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> {
|
||||
def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> {
|
||||
bits<4> Zm;
|
||||
bit iop;
|
||||
let Inst{20} = iop;
|
||||
let Inst{19-16} = Zm;
|
||||
}
|
||||
|
||||
def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
|
||||
def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
|
||||
def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
|
||||
(!cast<Instruction>(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1476,10 +1507,17 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
|
|||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_fp_fcmla<string asm> {
|
||||
multiclass sve_fp_fcmla<string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_fcmla<0b01, asm, ZPR16>;
|
||||
def _S : sve_fp_fcmla<0b10, asm, ZPR32>;
|
||||
def _D : sve_fp_fcmla<0b11, asm, ZPR64>;
|
||||
|
||||
def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, nxv8f16:$Op4, (i32 complexrotateop:$imm))),
|
||||
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
|
||||
def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, nxv4f32:$Op4, (i32 complexrotateop:$imm))),
|
||||
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
|
||||
def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, nxv2f64:$Op4, (i32 complexrotateop:$imm))),
|
||||
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, $Op4, complexrotateop:$imm)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1509,19 +1547,24 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
|
|||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
|
||||
def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> {
|
||||
multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS32b> {
|
||||
bits<3> Zm;
|
||||
bits<2> iop;
|
||||
let Inst{20-19} = iop;
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> {
|
||||
def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD32b> {
|
||||
bits<4> Zm;
|
||||
bits<1> iop;
|
||||
let Inst{20} = iop;
|
||||
let Inst{19-16} = Zm;
|
||||
}
|
||||
|
||||
def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
|
||||
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
|
||||
def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
|
||||
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1552,10 +1595,17 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
|
|||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_fp_fcadd<string asm> {
|
||||
multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_fcadd<0b01, asm, ZPR16>;
|
||||
def _S : sve_fp_fcadd<0b10, asm, ZPR32>;
|
||||
def _D : sve_fp_fcadd<0b11, asm, ZPR64>;
|
||||
|
||||
def : Pat<(nxv8f16 (op nxv8i1:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 complexrotateopodd:$imm))),
|
||||
(!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
|
||||
def : Pat<(nxv4f32 (op nxv4i1:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 complexrotateopodd:$imm))),
|
||||
(!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
|
||||
def : Pat<(nxv2f64 (op nxv2i1:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 complexrotateopodd:$imm))),
|
||||
(!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, complexrotateopodd:$imm)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -5646,10 +5696,14 @@ class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
|
|||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve_int_bin_cons_misc_0_b<string asm> {
|
||||
multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> {
|
||||
def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>;
|
||||
def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>;
|
||||
def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>;
|
||||
|
||||
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
|
||||
|
|
|
@ -68,6 +68,111 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCADD
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fcadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcadd_h:
|
||||
; CHECK: fcadd z0.h, p0/m, z0.h, z1.h, #90
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
i32 90)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fcadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcadd_s:
|
||||
; CHECK: fcadd z0.s, p0/m, z0.s, z1.s, #270
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
i32 270)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fcadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcadd_d:
|
||||
; CHECK: fcadd z0.d, p0/m, z0.d, z1.d, #90
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
i32 90)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMLA
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fcmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fcmla_h:
|
||||
; CHECK: fcmla z0.h, p0/m, z1.h, z2.h, #90
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 90)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fcmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fcmla_s:
|
||||
; CHECK: fcmla z0.s, p0/m, z1.s, z2.s, #180
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c,
|
||||
i32 180)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fcmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fcmla_d:
|
||||
; CHECK: fcmla z0.d, p0/m, z1.d, z2.d, #270
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c,
|
||||
i32 270)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMLA (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fcmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fcmla_lane_h:
|
||||
; CHECK: fcmla z0.h, z1.h, z2.h[3], #0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 3,
|
||||
i32 0)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fcmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fcmla_lane_s:
|
||||
; CHECK: fcmla z0.s, z1.s, z2.s[1], #90
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c,
|
||||
i32 1,
|
||||
i32 90)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FDIV
|
||||
;
|
||||
|
@ -136,6 +241,43 @@ define <vscale x 2 x double> @fdivr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMAD
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmad_h:
|
||||
; CHECK: fmad z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fmad_s:
|
||||
; CHECK: fmad z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fmad_d:
|
||||
; CHECK: fmad z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMAX
|
||||
;
|
||||
|
@ -272,6 +414,191 @@ define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLA
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmla_h:
|
||||
; CHECK: fmla z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fmla_s:
|
||||
; CHECK: fmla z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fmla_d:
|
||||
; CHECK: fmla z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLA (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmla_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmla_lane_h:
|
||||
; CHECK: fmla z0.h, z1.h, z2.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 3)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmla_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fmla_lane_s:
|
||||
; CHECK: fmla z0.s, z1.s, z2.s[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c,
|
||||
i32 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmla_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fmla_lane_d:
|
||||
; CHECK: fmla z0.d, z1.d, z2.d[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c,
|
||||
i32 1)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLS
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmls_h:
|
||||
; CHECK: fmls z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fmls_s:
|
||||
; CHECK: fmls z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fmls_d:
|
||||
; CHECK: fmls z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLS (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmls_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmls_lane_h:
|
||||
; CHECK: fmls z0.h, z1.h, z2.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 3)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmls_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fmls_lane_s:
|
||||
; CHECK: fmls z0.s, z1.s, z2.s[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c,
|
||||
i32 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmls_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fmls_lane_d:
|
||||
; CHECK: fmls z0.d, z1.d, z2.d[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c,
|
||||
i32 1)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMSB
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmsb_h:
|
||||
; CHECK: fmsb z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fmsb_s:
|
||||
; CHECK: fmsb z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fmsb_d:
|
||||
; CHECK: fmsb z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMUL
|
||||
;
|
||||
|
@ -306,6 +633,40 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMUL (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmul_lane_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fmul_lane_h:
|
||||
; CHECK: fmul z0.h, z0.h, z1.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
i32 3)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmul_lane_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmul_lane_s:
|
||||
; CHECK: fmul z0.s, z0.s, z1.s[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
i32 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmul_lane_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmul_lane_d:
|
||||
; CHECK: fmul z0.d, z0.d, z1.d[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
i32 1)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMULX
|
||||
;
|
||||
|
@ -374,6 +735,154 @@ define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x doub
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FNMAD
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fnmad_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fnmad_h:
|
||||
; CHECK: fnmad z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fnmad_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fnmad_s:
|
||||
; CHECK: fnmad z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fnmad_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fnmad_d:
|
||||
; CHECK: fnmad z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FNMLA
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fnmla_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fnmla_h:
|
||||
; CHECK: fnmla z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fnmla_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fnmla_s:
|
||||
; CHECK: fnmla z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fnmla_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fnmla_d:
|
||||
; CHECK: fnmla z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FNMLS
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fnmls_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fnmls_h:
|
||||
; CHECK: fnmls z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fnmls_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fnmls_s:
|
||||
; CHECK: fnmls z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fnmls_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fnmls_d:
|
||||
; CHECK: fnmls z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FNMSB
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fnmsb_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fnmsb_h:
|
||||
; CHECK: fnmsb z0.h, p0/m, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fnmsb_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fnmsb_s:
|
||||
; CHECK: fnmsb z0.s, p0/m, z1.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
<vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fnmsb_d:
|
||||
; CHECK: fnmsb z0.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
<vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FSUB
|
||||
;
|
||||
|
@ -442,6 +951,40 @@ define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FTMAD
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @ftmad_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: ftmad_h:
|
||||
; CHECK: ftmad z0.h, z0.h, z1.h, #0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
i32 0)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @ftmad_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: ftmad_s:
|
||||
; CHECK: ftmad z0.s, z0.s, z1.s, #0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b,
|
||||
i32 0)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @ftmad_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: ftmad_d:
|
||||
; CHECK: ftmad z0.d, z0.d, z1.d, #7
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b,
|
||||
i32 7)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FTSMUL
|
||||
;
|
||||
|
@ -473,6 +1016,37 @@ define <vscale x 2 x double> @ftsmul_d(<vscale x 2 x double> %a, <vscale x 2 x i
|
|||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FTSSEL
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @ftssel_h(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
|
||||
; CHECK-LABEL: ftssel_h:
|
||||
; CHECK: ftssel z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half> %a,
|
||||
<vscale x 8 x i16> %b)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @ftssel_s(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: ftssel_s:
|
||||
; CHECK: ftssel z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @ftssel_d(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: ftssel_d:
|
||||
; CHECK: ftssel z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double> %a,
|
||||
<vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
@ -481,6 +1055,17 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <v
|
|||
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
@ -489,6 +1074,10 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1>, <
|
|||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
@ -505,14 +1094,54 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>,
|
|||
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x i64>)
|
||||
|
@ -525,6 +1154,14 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1>, <
|
|||
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.ftmad.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ftmad.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ftmad.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.ftsmul.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ftsmul.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ftsmul.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.ftssel.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ftssel.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ftssel.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
|
||||
|
|
Loading…
Reference in New Issue