forked from OSchip/llvm-project
[AArch64][SVE] Add support for DestructiveBinary and DestructiveBinaryComm DestructiveInstTypes
Add support for DestructiveBinaryComm DestructiveInstType, as well as the lowering code to expand the new Pseudos into the final movprfx+instruction pairs. Differential Revision: https://reviews.llvm.org/D73711
This commit is contained in:
parent
b72f1448ce
commit
a5b22b768f
|
@ -68,6 +68,8 @@ private:
|
|||
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
unsigned BitSize);
|
||||
|
||||
bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI);
|
||||
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
|
||||
unsigned ExtendImm, unsigned ZeroReg,
|
||||
|
@ -344,6 +346,176 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
|
|||
return true;
|
||||
}
|
||||
|
||||
/// \brief Expand Pseudos to Instructions with destructive operands.
|
||||
///
|
||||
/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
|
||||
/// or for fixing relaxed register allocation conditions to comply with
|
||||
/// the instructions register constraints. The latter case may be cheaper
|
||||
/// than setting the register constraints in the register allocator,
|
||||
/// since that will insert regular MOV instructions rather than MOVPRFX.
|
||||
///
|
||||
/// Example (after register allocation):
|
||||
///
|
||||
/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
|
||||
///
|
||||
/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
|
||||
/// * We cannot map directly to FSUB_ZPmZ_B because the register
|
||||
/// constraints of the instruction are not met.
|
||||
/// * Also the _ZERO specifies the false lanes need to be zeroed.
|
||||
///
|
||||
/// We first try to see if the destructive operand == result operand,
|
||||
/// if not, we try to swap the operands, e.g.
|
||||
///
|
||||
/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
|
||||
///
|
||||
/// But because FSUB_ZPmZ is not commutative, this is semantically
|
||||
/// different, so we need a reverse instruction:
|
||||
///
|
||||
/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
|
||||
///
|
||||
/// Then we implement the zeroing of the false lanes of Z0 by adding
|
||||
/// a zeroing MOVPRFX instruction:
|
||||
///
|
||||
/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
|
||||
/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
|
||||
///
|
||||
/// Note that this can only be done for _ZERO or _UNDEF variants where
|
||||
/// we can guarantee the false lanes to be zeroed (by implementing this)
|
||||
/// or that they are undef (don't care / not used), otherwise the
|
||||
/// swapping of operands is illegal because the operation is not
|
||||
/// (or cannot be emulated to be) fully commutative.
|
||||
bool AArch64ExpandPseudo::expand_DestructiveOp(
|
||||
MachineInstr &MI,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
|
||||
uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
|
||||
uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
|
||||
bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
|
||||
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
bool DstIsDead = MI.getOperand(0).isDead();
|
||||
|
||||
if (DType == AArch64::DestructiveBinary)
|
||||
assert(DstReg != MI.getOperand(3).getReg());
|
||||
|
||||
bool UseRev = false;
|
||||
unsigned PredIdx, DOPIdx, SrcIdx;
|
||||
switch (DType) {
|
||||
case AArch64::DestructiveBinaryComm:
|
||||
case AArch64::DestructiveBinaryCommWithRev:
|
||||
if (DstReg == MI.getOperand(3).getReg()) {
|
||||
// FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
|
||||
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
|
||||
UseRev = true;
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64::DestructiveBinary:
|
||||
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported Destructive Operand type");
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// MOVPRFX can only be used if the destination operand
|
||||
// is the destructive operand, not as any other operand,
|
||||
// so the Destructive Operand must be unique.
|
||||
bool DOPRegIsUnique = false;
|
||||
switch (DType) {
|
||||
case AArch64::DestructiveBinaryComm:
|
||||
case AArch64::DestructiveBinaryCommWithRev:
|
||||
DOPRegIsUnique =
|
||||
DstReg != MI.getOperand(DOPIdx).getReg() ||
|
||||
MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
|
||||
break;
|
||||
}
|
||||
|
||||
assert (DOPRegIsUnique && "The destructive operand should be unique");
|
||||
#endif
|
||||
|
||||
// Resolve the reverse opcode
|
||||
if (UseRev) {
|
||||
if (AArch64::getSVERevInstr(Opcode) != -1)
|
||||
Opcode = AArch64::getSVERevInstr(Opcode);
|
||||
else if (AArch64::getSVEOrigInstr(Opcode) != -1)
|
||||
Opcode = AArch64::getSVEOrigInstr(Opcode);
|
||||
}
|
||||
|
||||
// Get the right MOVPRFX
|
||||
uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
|
||||
unsigned MovPrfx, MovPrfxZero;
|
||||
switch (ElementSize) {
|
||||
case AArch64::ElementSizeNone:
|
||||
case AArch64::ElementSizeB:
|
||||
MovPrfx = AArch64::MOVPRFX_ZZ;
|
||||
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
|
||||
break;
|
||||
case AArch64::ElementSizeH:
|
||||
MovPrfx = AArch64::MOVPRFX_ZZ;
|
||||
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
|
||||
break;
|
||||
case AArch64::ElementSizeS:
|
||||
MovPrfx = AArch64::MOVPRFX_ZZ;
|
||||
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
|
||||
break;
|
||||
case AArch64::ElementSizeD:
|
||||
MovPrfx = AArch64::MOVPRFX_ZZ;
|
||||
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unsupported ElementSize");
|
||||
}
|
||||
|
||||
//
|
||||
// Create the destructive operation (if required)
|
||||
//
|
||||
MachineInstrBuilder PRFX, DOP;
|
||||
if (FalseZero) {
|
||||
assert(ElementSize != AArch64::ElementSizeNone &&
|
||||
"This instruction is unpredicated");
|
||||
|
||||
// Merge source operand into destination register
|
||||
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
|
||||
.addReg(DstReg, RegState::Define)
|
||||
.addReg(MI.getOperand(PredIdx).getReg())
|
||||
.addReg(MI.getOperand(DOPIdx).getReg());
|
||||
|
||||
// After the movprfx, the destructive operand is same as Dst
|
||||
DOPIdx = 0;
|
||||
} else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
|
||||
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
|
||||
.addReg(DstReg, RegState::Define)
|
||||
.addReg(MI.getOperand(DOPIdx).getReg());
|
||||
DOPIdx = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Create the destructive operation
|
||||
//
|
||||
DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
|
||||
|
||||
switch (DType) {
|
||||
case AArch64::DestructiveBinaryComm:
|
||||
case AArch64::DestructiveBinaryCommWithRev:
|
||||
DOP.add(MI.getOperand(PredIdx))
|
||||
.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
|
||||
.add(MI.getOperand(SrcIdx));
|
||||
break;
|
||||
}
|
||||
|
||||
if (PRFX) {
|
||||
finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
|
||||
transferImpOps(MI, PRFX, DOP);
|
||||
} else
|
||||
transferImpOps(MI, DOP, DOP);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64ExpandPseudo::expandSetTagLoop(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
|
@ -425,6 +597,17 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
|
||||
// Check if we can expand the destructive op
|
||||
int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
|
||||
if (OrigInstr != -1) {
|
||||
auto &Orig = TII->get(OrigInstr);
|
||||
if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
|
||||
!= AArch64::NotDestructive) {
|
||||
return expand_DestructiveOp(MI, MBB, MBBI);
|
||||
}
|
||||
}
|
||||
|
||||
switch (Opcode) {
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -162,6 +162,25 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool SelectDupZero(SDValue N) {
|
||||
switch(N->getOpcode()) {
|
||||
case AArch64ISD::DUP:
|
||||
case ISD::SPLAT_VECTOR: {
|
||||
auto Opnd0 = N->getOperand(0);
|
||||
if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
|
||||
if (CN->isNullValue())
|
||||
return true;
|
||||
if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
|
||||
if (CN->isZero())
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<MVT::SimpleValueType VT>
|
||||
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
|
||||
return SelectSVEAddSubImm(N, VT, Imm, Shift);
|
||||
|
|
|
@ -22,13 +22,27 @@ def NormalFrm : Format<1>; // Do we need any others?
|
|||
|
||||
// Enum describing whether an instruction is
|
||||
// destructive in its first source operand.
|
||||
class DestructiveInstTypeEnum<bits<1> val> {
|
||||
bits<1> Value = val;
|
||||
class DestructiveInstTypeEnum<bits<4> val> {
|
||||
bits<4> Value = val;
|
||||
}
|
||||
def NotDestructive : DestructiveInstTypeEnum<0>;
|
||||
def NotDestructive : DestructiveInstTypeEnum<0>;
|
||||
// Destructive in its first operand and can be MOVPRFX'd, but has no other
|
||||
// special properties.
|
||||
def DestructiveOther : DestructiveInstTypeEnum<1>;
|
||||
def DestructiveOther : DestructiveInstTypeEnum<1>;
|
||||
def DestructiveUnary : DestructiveInstTypeEnum<2>;
|
||||
def DestructiveBinaryImm : DestructiveInstTypeEnum<3>;
|
||||
def DestructiveBinaryShImmUnpred : DestructiveInstTypeEnum<4>;
|
||||
def DestructiveBinary : DestructiveInstTypeEnum<5>;
|
||||
def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
|
||||
def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
|
||||
def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
|
||||
|
||||
class FalseLanesEnum<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
}
|
||||
def FalseLanesNone : FalseLanesEnum<0>;
|
||||
def FalseLanesZero : FalseLanesEnum<1>;
|
||||
def FalseLanesUndef : FalseLanesEnum<2>;
|
||||
|
||||
// AArch64 Instruction Format
|
||||
class AArch64Inst<Format f, string cstr> : Instruction {
|
||||
|
@ -46,10 +60,12 @@ class AArch64Inst<Format f, string cstr> : Instruction {
|
|||
bits<2> Form = F.Value;
|
||||
|
||||
// Defaults
|
||||
FalseLanesEnum FalseLanes = FalseLanesNone;
|
||||
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
|
||||
ElementSizeEnum ElementSize = ElementSizeNone;
|
||||
|
||||
let TSFlags{3} = DestructiveInstType.Value;
|
||||
let TSFlags{8-7} = FalseLanes.Value;
|
||||
let TSFlags{6-3} = DestructiveInstType.Value;
|
||||
let TSFlags{2-0} = ElementSize.Value;
|
||||
|
||||
let Pattern = [];
|
||||
|
|
|
@ -119,11 +119,25 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
|
|||
case AArch64::SPACE:
|
||||
NumBytes = MI.getOperand(1).getImm();
|
||||
break;
|
||||
case TargetOpcode::BUNDLE:
|
||||
NumBytes = getInstBundleLength(MI);
|
||||
break;
|
||||
}
|
||||
|
||||
return NumBytes;
|
||||
}
|
||||
|
||||
unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
|
||||
unsigned Size = 0;
|
||||
MachineBasicBlock::const_instr_iterator I = MI.getIterator();
|
||||
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
|
||||
while (++I != E && I->isInsideBundle()) {
|
||||
assert(!I->isBundle() && "No nested bundle!");
|
||||
Size += getInstSizeInBytes(*I);
|
||||
}
|
||||
return Size;
|
||||
}
|
||||
|
||||
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
|
||||
SmallVectorImpl<MachineOperand> &Cond) {
|
||||
// Block ends with fall-through condbranch.
|
||||
|
@ -6680,5 +6694,10 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
|
|||
return TargetInstrInfo::describeLoadedValue(MI, Reg);
|
||||
}
|
||||
|
||||
uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
|
||||
return get(Opc).TSFlags & AArch64::ElementSizeMask;
|
||||
}
|
||||
|
||||
#define GET_INSTRINFO_HELPERS
|
||||
#define GET_INSTRMAP_INFO
|
||||
#include "AArch64GenInstrInfo.inc"
|
||||
|
|
|
@ -271,6 +271,8 @@ public:
|
|||
MachineBasicBlock::iterator &It, MachineFunction &MF,
|
||||
const outliner::Candidate &C) const override;
|
||||
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
|
||||
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
|
||||
uint64_t getElementSizeForOpcode(unsigned Opc) const;
|
||||
/// Returns true if the instruction has a shift by immediate that can be
|
||||
/// executed in one cycle less.
|
||||
static bool isFalkorShiftExtFast(const MachineInstr &MI);
|
||||
|
@ -295,6 +297,8 @@ protected:
|
|||
isCopyInstrImpl(const MachineInstr &MI) const override;
|
||||
|
||||
private:
|
||||
unsigned getInstBundleLength(const MachineInstr &MI) const;
|
||||
|
||||
/// Sets the offsets on outlined instructions in \p MBB which use SP
|
||||
/// so that they will be valid post-outlining.
|
||||
///
|
||||
|
@ -381,7 +385,8 @@ static inline bool isIndirectBranchOpcode(int Opc) {
|
|||
|
||||
// struct TSFlags {
|
||||
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
|
||||
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
|
||||
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
|
||||
#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
|
||||
// }
|
||||
|
||||
namespace AArch64 {
|
||||
|
@ -396,13 +401,31 @@ enum ElementSizeType {
|
|||
};
|
||||
|
||||
enum DestructiveInstType {
|
||||
DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
|
||||
NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
|
||||
DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
|
||||
DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
|
||||
NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
|
||||
DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
|
||||
DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
|
||||
DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
|
||||
DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
|
||||
DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
|
||||
DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
|
||||
DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
|
||||
DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
|
||||
};
|
||||
|
||||
enum FalseLaneType {
|
||||
FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3),
|
||||
FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1),
|
||||
FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
|
||||
};
|
||||
|
||||
#undef TSFLAG_ELEMENT_SIZE_TYPE
|
||||
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
|
||||
#undef TSFLAG_FALSE_LANE_TYPE
|
||||
|
||||
int getSVEPseudoMap(uint16_t Opcode);
|
||||
int getSVERevInstr(uint16_t Opcode);
|
||||
int getSVEOrigInstr(uint16_t Opcode);
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -239,19 +239,32 @@ let Predicates = [HasSVE] in {
|
|||
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
|
||||
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
|
||||
|
||||
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", int_aarch64_sve_fadd>;
|
||||
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", int_aarch64_sve_fsub>;
|
||||
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", int_aarch64_sve_fmul>;
|
||||
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", int_aarch64_sve_fsubr>;
|
||||
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", int_aarch64_sve_fmaxnm>;
|
||||
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", int_aarch64_sve_fminnm>;
|
||||
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", int_aarch64_sve_fmax>;
|
||||
defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", int_aarch64_sve_fmin>;
|
||||
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", int_aarch64_sve_fabd>;
|
||||
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
|
||||
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ", 1>;
|
||||
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>;
|
||||
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", 0>;
|
||||
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
|
||||
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
|
||||
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
|
||||
defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
|
||||
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>;
|
||||
defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
|
||||
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", int_aarch64_sve_fmulx>;
|
||||
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", int_aarch64_sve_fdivr>;
|
||||
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", int_aarch64_sve_fdiv>;
|
||||
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>;
|
||||
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>;
|
||||
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>;
|
||||
|
||||
defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd>;
|
||||
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsub>;
|
||||
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmul>;
|
||||
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsubr>;
|
||||
defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmaxnm>;
|
||||
defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fminnm>;
|
||||
defm FMAX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmax>;
|
||||
defm FMIN_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmin>;
|
||||
defm FABD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fabd>;
|
||||
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmulx>;
|
||||
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdivr>;
|
||||
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdiv>;
|
||||
|
||||
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
|
||||
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
|
||||
|
|
|
@ -644,4 +644,7 @@ void AArch64PassConfig::addPreEmitPass() {
|
|||
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
|
||||
TM->getTargetTriple().isOSBinFormatMachO())
|
||||
addPass(createAArch64CollectLOHPass());
|
||||
|
||||
// SVE bundles move prefixes with destructive operations.
|
||||
addPass(createUnpackMachineBundles(nullptr));
|
||||
}
|
||||
|
|
|
@ -367,8 +367,16 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
|||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
|
||||
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
|
||||
|
||||
def SVEDup0 : ComplexPattern<i64, 0, "SelectDupZero", []>;
|
||||
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
|
||||
|
||||
let AddedComplexity = 1 in {
|
||||
class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, Instruction inst>
|
||||
: Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))),
|
||||
(inst $Op1, $Op2, $Op3)>;
|
||||
}
|
||||
|
||||
//
|
||||
// Common but less generic patterns.
|
||||
//
|
||||
|
@ -378,6 +386,55 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
|||
: Pat<(vtd (op vt1:$Op1)),
|
||||
(inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
|
||||
|
||||
//
|
||||
// Pseudo -> Instruction mappings
|
||||
//
|
||||
def getSVEPseudoMap : InstrMapping {
|
||||
let FilterClass = "SVEPseudo2Instr";
|
||||
let RowFields = ["PseudoName"];
|
||||
let ColFields = ["IsInstr"];
|
||||
let KeyCol = ["0"];
|
||||
let ValueCols = [["1"]];
|
||||
}
|
||||
|
||||
class SVEPseudo2Instr<string name, bit instr> {
|
||||
string PseudoName = name;
|
||||
bit IsInstr = instr;
|
||||
}
|
||||
|
||||
def getSVERevInstr : InstrMapping {
|
||||
let FilterClass = "SVEInstr2Rev";
|
||||
let RowFields = ["InstrName"];
|
||||
let ColFields = ["IsOrig"];
|
||||
let KeyCol = ["1"];
|
||||
let ValueCols = [["0"]];
|
||||
}
|
||||
|
||||
def getSVEOrigInstr : InstrMapping {
|
||||
let FilterClass = "SVEInstr2Rev";
|
||||
let RowFields = ["InstrName"];
|
||||
let ColFields = ["IsOrig"];
|
||||
let KeyCol = ["0"];
|
||||
let ValueCols = [["1"]];
|
||||
}
|
||||
|
||||
class SVEInstr2Rev<string name, string revname, bit nameIsOrig> {
|
||||
string InstrName = !if(nameIsOrig, name, revname);
|
||||
bit IsOrig = nameIsOrig;
|
||||
}
|
||||
|
||||
//
|
||||
// Pseudos for destructive operands
|
||||
//
|
||||
let hasNoSchedulingInfo = 1 in {
|
||||
class PredTwoOpPseudo<string name, ZPRRegOp zprty,
|
||||
FalseLanesEnum flags = FalseLanesNone>
|
||||
: SVEPseudo2Instr<name, 0>,
|
||||
Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> {
|
||||
let FalseLanes = flags;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE Predicate Misc Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1427,11 +1484,17 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
|
|||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
|
||||
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
|
||||
SDPatternOperator op, DestructiveInstTypeEnum flags,
|
||||
string revname="", bit isOrig=0> {
|
||||
let DestructiveInstType = flags in {
|
||||
def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>,
|
||||
SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isOrig>;
|
||||
def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>,
|
||||
SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isOrig>;
|
||||
def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>,
|
||||
SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isOrig>;
|
||||
}
|
||||
|
||||
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
|
@ -1449,6 +1512,16 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
|
|||
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_fp_2op_p_zds_zx<SDPatternOperator op> {
|
||||
def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
|
||||
def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
|
||||
def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
|
||||
|
||||
def : SVE_3_Op_Pat_SelZero<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _ZERO_H)>;
|
||||
def : SVE_3_Op_Pat_SelZero<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _ZERO_S)>;
|
||||
def : SVE_3_Op_Pat_SelZero<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _ZERO_D)>;
|
||||
}
|
||||
|
||||
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
|
||||
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
; CHECK-NEXT: Implement the 'patchable-function' attribute
|
||||
; CHECK-NEXT: AArch64 Branch Targets
|
||||
; CHECK-NEXT: Branch relaxation pass
|
||||
; CHECK-NEXT: Unpack machine instruction bundles
|
||||
; CHECK-NEXT: Contiguously Lay Out Funclets
|
||||
; CHECK-NEXT: StackMap Liveness Analysis
|
||||
; CHECK-NEXT: Live DEBUG_VALUE analysis
|
||||
|
|
|
@ -178,6 +178,7 @@
|
|||
; CHECK-NEXT: AArch64 Branch Targets
|
||||
; CHECK-NEXT: Branch relaxation pass
|
||||
; CHECK-NEXT: AArch64 Compress Jump Tables
|
||||
; CHECK-NEXT: Unpack machine instruction bundles
|
||||
; CHECK-NEXT: Contiguously Lay Out Funclets
|
||||
; CHECK-NEXT: StackMap Liveness Analysis
|
||||
; CHECK-NEXT: Live DEBUG_VALUE analysis
|
||||
|
|
|
@ -0,0 +1,261 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; FADD
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fadd_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fadd_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMAX
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmax_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmax_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmax_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmax_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMAXNM
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmaxnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmaxnm_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmaxnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmaxnm_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMIN
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmin_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmin_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmin_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmin_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMINNM
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fminnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fminnm_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fminnm_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMUL
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmul_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmul_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmul_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FSUB
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fsub_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fsub_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fsub_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fsub_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FSUBR
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fsubr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fsubr_s:
|
||||
; CHECK: movprfx z0.s, p0/z, z0.s
|
||||
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a_z,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fsubr_d:
|
||||
; CHECK: movprfx z0.d, p0/z, z0.d
|
||||
; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a_z,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
Loading…
Reference in New Issue