forked from OSchip/llvm-project
[AArch64][SME] Add SME outer product intrinsics
This patch adds the following intrinsics to support the SME ACLE: * @llvm.aarch64.sme.mopa: Non-widening outer product + accumulate * @llvm.aarch64.sme.mops: Non-widening outer product + subtract * @llvm.aarch64.sme.mopa.wide: Widening outer product + accumulate * @llvm.aarch64.sme.mops.wide: Widening outer product + subtract * @llvm.aarch64.sme.smopa.wide: Widening signed sum of outer product + accumulate * @llvm.aarch64.sme.smops.wide: Widening signed sum of outer product + subtract * @llvm.aarch64.sme.umopa.wide: Widening unsigned sum of outer product + accumulate * @llvm.aarch64.sme.umops.wide: Widening unsigned sum of outer product + subtract * @llvm.aarch64.sme.sumopa.wide: Widening signed by unsigned sum of outer product + accumulate * @llvm.aarch64.sme.sumops.wide: Widening signed by unsigned sum of outer product + subtract * @llvm.aarch64.sme.usmopa.wide: Widening unsigned by signed sum of outer product + accumulate * @llvm.aarch64.sme.usmops.wide: Widening unsigned by signed sum of outer product + subtract Differential Revision: https://reviews.llvm.org/D127956
This commit is contained in:
parent
5548e807b5
commit
f916ee0fb1
|
@ -2652,6 +2652,29 @@ let TargetPrefix = "aarch64" in {
|
|||
|
||||
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>;
|
||||
|
||||
class SME_OuterProduct_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[],
|
||||
[llvm_i64_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_anyvector_ty]>;
|
||||
|
||||
def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
|
||||
|
||||
def int_aarch64_sme_mopa_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_mops_wide : SME_OuterProduct_Intrinsic;
|
||||
|
||||
def int_aarch64_sme_smopa_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_smops_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_umopa_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_umops_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_sumopa_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_sumops_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic;
|
||||
def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic;
|
||||
|
||||
//
|
||||
// Counting elements
|
||||
//
|
||||
|
|
|
@ -2372,6 +2372,23 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
|
|||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg,
|
||||
MachineInstr &MI, MachineBasicBlock *BB) const {
|
||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
|
||||
|
||||
MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
|
||||
MIB.addReg(BaseReg + MI.getOperand(0).getImm());
|
||||
MIB.add(MI.getOperand(1)); // pn
|
||||
MIB.add(MI.getOperand(2)); // pm
|
||||
MIB.add(MI.getOperand(3)); // zn
|
||||
MIB.add(MI.getOperand(4)); // zm
|
||||
|
||||
MI.eraseFromParent(); // The pseudo is gone now.
|
||||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
|
||||
MachineInstr &MI,
|
||||
|
@ -2459,6 +2476,54 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
|
|||
return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
|
||||
case AArch64::LDR_ZA_PSEUDO:
|
||||
return EmitFill(MI, BB);
|
||||
case AArch64::BFMOPA_MPPZZ_PSEUDO:
|
||||
return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::BFMOPS_MPPZZ_PSEUDO:
|
||||
return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::FMOPAL_MPPZZ_PSEUDO:
|
||||
return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::FMOPSL_MPPZZ_PSEUDO:
|
||||
return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::FMOPA_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::FMOPS_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::FMOPA_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::FMOPS_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::SMOPA_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::SMOPS_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::UMOPA_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::UMOPS_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::SUMOPA_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::SUMOPS_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::USMOPA_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::USMOPS_MPPZZ_S_PSEUDO:
|
||||
return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
|
||||
case AArch64::SMOPA_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::SMOPS_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::UMOPA_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::UMOPS_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::SUMOPA_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::SUMOPS_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::USMOPA_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::USMOPS_MPPZZ_D_PSEUDO:
|
||||
return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
|
||||
case AArch64::INSERT_MXIPZ_H_PSEUDO_B:
|
||||
return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI,
|
||||
BB);
|
||||
|
|
|
@ -563,7 +563,8 @@ public:
|
|||
MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
|
||||
MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
|
|
@ -38,41 +38,41 @@ let Predicates = [HasSME] in {
|
|||
// Outer products
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">;
|
||||
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">;
|
||||
defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa", int_aarch64_sme_mopa_wide>;
|
||||
defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops", int_aarch64_sme_mops_wide>;
|
||||
|
||||
def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">;
|
||||
def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">;
|
||||
defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>;
|
||||
defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSMEF64] in {
|
||||
def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">;
|
||||
def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">;
|
||||
defm FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa", int_aarch64_sme_mopa>;
|
||||
defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">;
|
||||
defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">;
|
||||
defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa", int_aarch64_sme_mopa_wide>;
|
||||
defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops", int_aarch64_sme_mops_wide>;
|
||||
|
||||
def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">;
|
||||
def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">;
|
||||
def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">;
|
||||
def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">;
|
||||
def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">;
|
||||
def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">;
|
||||
def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">;
|
||||
def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">;
|
||||
defm SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa", int_aarch64_sme_smopa_wide>;
|
||||
defm SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops", int_aarch64_sme_smops_wide>;
|
||||
defm UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa", int_aarch64_sme_umopa_wide>;
|
||||
defm UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops", int_aarch64_sme_umops_wide>;
|
||||
defm SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa", int_aarch64_sme_sumopa_wide>;
|
||||
defm SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops", int_aarch64_sme_sumops_wide>;
|
||||
defm USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa", int_aarch64_sme_usmopa_wide>;
|
||||
defm USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops", int_aarch64_sme_usmops_wide>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSMEI64] in {
|
||||
def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">;
|
||||
def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">;
|
||||
def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">;
|
||||
def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">;
|
||||
def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">;
|
||||
def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">;
|
||||
def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">;
|
||||
def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">;
|
||||
defm SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa", int_aarch64_sme_smopa_wide>;
|
||||
defm SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops", int_aarch64_sme_smops_wide>;
|
||||
defm UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa", int_aarch64_sme_umopa_wide>;
|
||||
defm UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops", int_aarch64_sme_umops_wide>;
|
||||
defm SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa", int_aarch64_sme_sumopa_wide>;
|
||||
defm SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops", int_aarch64_sme_sumops_wide>;
|
||||
defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>;
|
||||
defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
|
|
|
@ -25,7 +25,8 @@ def TSV110Model : SchedMachineModel {
|
|||
let CompleteModel = 1;
|
||||
|
||||
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
|
||||
PAUnsupported.F);
|
||||
PAUnsupported.F,
|
||||
SMEUnsupported.F);
|
||||
}
|
||||
|
||||
// Define each kind of processor resource and number available on the TSV110,
|
||||
|
|
|
@ -28,6 +28,14 @@ def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>",
|
|||
// SME Outer Products
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sme_outer_product_pseudo<ZPRRegOp zpr_ty>
|
||||
: Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
|
||||
zpr_ty:$zn, zpr_ty:$zm), []>,
|
||||
Sched<[]> {
|
||||
// Translated to the actual instructions in AArch64ISelLowering.cpp
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
|
||||
class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
|
||||
ZPRRegOp zpr_ty, string mnemonic>
|
||||
: I<(outs za_ty:$ZAda),
|
||||
|
@ -52,17 +60,31 @@ class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
|
|||
let Constraints = "$ZAda = $_ZAda";
|
||||
}
|
||||
|
||||
class sme_outer_product_fp32<bit S, string mnemonic>
|
||||
: sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{1-0} = ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> {
|
||||
def NAME : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{1-0} = ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
}
|
||||
|
||||
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32>;
|
||||
|
||||
def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
|
||||
(nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)),
|
||||
(!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
|
||||
}
|
||||
|
||||
class sme_outer_product_fp64<bit S, string mnemonic>
|
||||
: sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
|
||||
def NAME : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
}
|
||||
|
||||
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64>;
|
||||
|
||||
def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
|
||||
(nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)),
|
||||
(!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
|
||||
}
|
||||
|
||||
class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
|
||||
|
@ -92,19 +114,35 @@ class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
|
|||
let Constraints = "$ZAda = $_ZAda";
|
||||
}
|
||||
|
||||
class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
|
||||
: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
|
||||
mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{1-0} = ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
|
||||
SDPatternOperator op> {
|
||||
def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32,
|
||||
ZPR8, mnemonic> {
|
||||
bits<2> ZAda;
|
||||
let Inst{1-0} = ZAda;
|
||||
let Inst{2} = 0b0;
|
||||
}
|
||||
|
||||
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8>;
|
||||
|
||||
def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
|
||||
(nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)),
|
||||
(!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
|
||||
}
|
||||
|
||||
class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
|
||||
: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
|
||||
mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
|
||||
SDPatternOperator op> {
|
||||
def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64,
|
||||
ZPR16, mnemonic> {
|
||||
bits<3> ZAda;
|
||||
let Inst{2-0} = ZAda;
|
||||
}
|
||||
|
||||
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
|
||||
|
||||
def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
|
||||
(nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)),
|
||||
(!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
|
||||
}
|
||||
|
||||
class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
|
||||
|
@ -131,12 +169,24 @@ class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
|
|||
let Constraints = "$ZAda = $_ZAda";
|
||||
}
|
||||
|
||||
multiclass sme_bf16_outer_product<bit S, string mnemonic> {
|
||||
def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
|
||||
multiclass sme_bf16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
|
||||
def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>;
|
||||
|
||||
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
|
||||
|
||||
def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
|
||||
(nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)),
|
||||
(!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
|
||||
}
|
||||
|
||||
multiclass sme_f16_outer_product<bit S, string mnemonic> {
|
||||
def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
|
||||
multiclass sme_f16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
|
||||
def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>;
|
||||
|
||||
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
|
||||
|
||||
def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
|
||||
(nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)),
|
||||
(!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define void @bfmopa(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
|
||||
; CHECK-LABEL: bfmopa:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bfmopa za0.s, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmopa(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
|
||||
; CHECK-LABEL: fmopa:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @smopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: smopa_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smopa za2.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @smopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: smopa_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smopa za0.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @umopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: umopa_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umopa za3.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @umopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: umopa_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umopa za1.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmopa_s(<vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
|
||||
; CHECK-LABEL: fmopa_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mopa.nxv4f32(i64 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmopa_d(<vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm) #1 {
|
||||
; CHECK-LABEL: fmopa_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmopa za2.d, p0/m, p1/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mopa.nxv2f64(i64 2, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sumopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: sumopa_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sumopa za1.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 1, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sumopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: sumopa_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sumopa za3.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @usmopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: usmopa_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: usmopa za2.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @usmopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: usmopa_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: usmopa za7.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sme-i64" }
|
||||
attributes #1 = { "target-features"="+sme-f64" }
|
||||
|
||||
declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
||||
declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare void @llvm.aarch64.sme.mopa.nxv4f32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare void @llvm.aarch64.sme.mopa.nxv2f64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
|
@ -0,0 +1,126 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define void @bfmops(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
|
||||
; CHECK-LABEL: bfmops:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bfmops za0.s, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmops(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
|
||||
; CHECK-LABEL: fmops:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmops za1.s, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @smops_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: smops_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smops za2.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @smops_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: smops_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smops za0.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @umops_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: umops_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umops za3.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @umops_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: umops_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umops za1.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmops_s(<vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
|
||||
; CHECK-LABEL: fmops_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmops za0.s, p0/m, p1/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mops.nxv4f32(i64 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmops_d(<vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm) #1 {
|
||||
; CHECK-LABEL: fmops_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmops za2.d, p0/m, p1/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.mops.nxv2f64(i64 2, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sumops_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: sumops_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sumops za1.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 1, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sumops_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: sumops_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sumops za3.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @usmops_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
|
||||
; CHECK-LABEL: usmops_s:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: usmops za2.s, p0/m, p1/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @usmops_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
|
||||
; CHECK-LABEL: usmops_d:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: usmops za7.d, p0/m, p1/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sme-i64" }
|
||||
attributes #1 = { "target-features"="+sme-f64" }
|
||||
|
||||
declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
||||
declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare void @llvm.aarch64.sme.mops.nxv4f32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare void @llvm.aarch64.sme.mops.nxv2f64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
Loading…
Reference in New Issue