[AArch64][SVE] Add support for DestructiveBinary and DestructiveBinaryComm DestructiveInstTypes

Add support for DestructiveBinaryComm DestructiveInstType, as well as the lowering code to expand the new Pseudos into the final movprfx+instruction pairs.

Differential Revision: https://reviews.llvm.org/D73711
This commit is contained in:
Cameron McInally 2020-02-21 14:41:35 -06:00
parent b72f1448ce
commit a5b22b768f
11 changed files with 638 additions and 26 deletions

View File

@ -68,6 +68,8 @@ private:
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
unsigned ExtendImm, unsigned ZeroReg,
@ -344,6 +346,176 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
return true;
}
/// \brief Expand Pseudos to Instructions with destructive operands.
///
/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
/// or for fixing relaxed register allocation conditions to comply with
/// the instructions register constraints. The latter case may be cheaper
/// than setting the register constraints in the register allocator,
/// since that will insert regular MOV instructions rather than MOVPRFX.
///
/// Example (after register allocation):
///
/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
///
/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
/// * We cannot map directly to FSUB_ZPmZ_B because the register
/// constraints of the instruction are not met.
/// * Also the _ZERO specifies the false lanes need to be zeroed.
///
/// We first try to see if the destructive operand == result operand,
/// if not, we try to swap the operands, e.g.
///
/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
///
/// But because FSUB_ZPmZ is not commutative, this is semantically
/// different, so we need a reverse instruction:
///
/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
///
/// Then we implement the zeroing of the false lanes of Z0 by adding
/// a zeroing MOVPRFX instruction:
///
/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
///
/// Note that this can only be done for _ZERO or _UNDEF variants where
/// we can guarantee the false lanes to be zeroed (by implementing this)
/// or that they are undef (don't care / not used), otherwise the
/// swapping of operands is illegal because the operation is not
/// (or cannot be emulated to be) fully commutative.
bool AArch64ExpandPseudo::expand_DestructiveOp(
MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
if (DType == AArch64::DestructiveBinary)
assert(DstReg != MI.getOperand(3).getReg());
bool UseRev = false;
unsigned PredIdx, DOPIdx, SrcIdx;
switch (DType) {
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
if (DstReg == MI.getOperand(3).getReg()) {
// FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
UseRev = true;
break;
}
LLVM_FALLTHROUGH;
case AArch64::DestructiveBinary:
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
break;
default:
llvm_unreachable("Unsupported Destructive Operand type");
}
#ifndef NDEBUG
// MOVPRFX can only be used if the destination operand
// is the destructive operand, not as any other operand,
// so the Destructive Operand must be unique.
bool DOPRegIsUnique = false;
switch (DType) {
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
DOPRegIsUnique =
DstReg != MI.getOperand(DOPIdx).getReg() ||
MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
break;
}
assert (DOPRegIsUnique && "The destructive operand should be unique");
#endif
// Resolve the reverse opcode
if (UseRev) {
if (AArch64::getSVERevInstr(Opcode) != -1)
Opcode = AArch64::getSVERevInstr(Opcode);
else if (AArch64::getSVEOrigInstr(Opcode) != -1)
Opcode = AArch64::getSVEOrigInstr(Opcode);
}
// Get the right MOVPRFX
uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
unsigned MovPrfx, MovPrfxZero;
switch (ElementSize) {
case AArch64::ElementSizeNone:
case AArch64::ElementSizeB:
MovPrfx = AArch64::MOVPRFX_ZZ;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
break;
case AArch64::ElementSizeH:
MovPrfx = AArch64::MOVPRFX_ZZ;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
break;
case AArch64::ElementSizeS:
MovPrfx = AArch64::MOVPRFX_ZZ;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
break;
case AArch64::ElementSizeD:
MovPrfx = AArch64::MOVPRFX_ZZ;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
break;
default:
llvm_unreachable("Unsupported ElementSize");
}
//
// Create the destructive operation (if required)
//
MachineInstrBuilder PRFX, DOP;
if (FalseZero) {
assert(ElementSize != AArch64::ElementSizeNone &&
"This instruction is unpredicated");
// Merge source operand into destination register
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
.addReg(DstReg, RegState::Define)
.addReg(MI.getOperand(PredIdx).getReg())
.addReg(MI.getOperand(DOPIdx).getReg());
// After the movprfx, the destructive operand is same as Dst
DOPIdx = 0;
} else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
.addReg(DstReg, RegState::Define)
.addReg(MI.getOperand(DOPIdx).getReg());
DOPIdx = 0;
}
//
// Create the destructive operation
//
DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
switch (DType) {
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
DOP.add(MI.getOperand(PredIdx))
.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
.add(MI.getOperand(SrcIdx));
break;
}
if (PRFX) {
finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
transferImpOps(MI, PRFX, DOP);
} else
transferImpOps(MI, DOP, DOP);
MI.eraseFromParent();
return true;
}
bool AArch64ExpandPseudo::expandSetTagLoop(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
@ -425,6 +597,17 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
// Check if we can expand the destructive op
int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
if (OrigInstr != -1) {
auto &Orig = TII->get(OrigInstr);
if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
!= AArch64::NotDestructive) {
return expand_DestructiveOp(MI, MBB, MBBI);
}
}
switch (Opcode) {
default:
break;

View File

@ -162,6 +162,25 @@ public:
return false;
}
bool SelectDupZero(SDValue N) {
switch(N->getOpcode()) {
case AArch64ISD::DUP:
case ISD::SPLAT_VECTOR: {
auto Opnd0 = N->getOperand(0);
if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
if (CN->isNullValue())
return true;
if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
if (CN->isZero())
return true;
}
default:
break;
}
return false;
}
template<MVT::SimpleValueType VT>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift);

View File

@ -22,13 +22,27 @@ def NormalFrm : Format<1>; // Do we need any others?
// Enum describing whether an instruction is
// destructive in its first source operand.
class DestructiveInstTypeEnum<bits<1> val> {
bits<1> Value = val;
class DestructiveInstTypeEnum<bits<4> val> {
bits<4> Value = val;
}
def NotDestructive : DestructiveInstTypeEnum<0>;
def NotDestructive : DestructiveInstTypeEnum<0>;
// Destructive in its first operand and can be MOVPRFX'd, but has no other
// special properties.
def DestructiveOther : DestructiveInstTypeEnum<1>;
def DestructiveOther : DestructiveInstTypeEnum<1>;
def DestructiveUnary : DestructiveInstTypeEnum<2>;
def DestructiveBinaryImm : DestructiveInstTypeEnum<3>;
def DestructiveBinaryShImmUnpred : DestructiveInstTypeEnum<4>;
def DestructiveBinary : DestructiveInstTypeEnum<5>;
def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
class FalseLanesEnum<bits<2> val> {
bits<2> Value = val;
}
def FalseLanesNone : FalseLanesEnum<0>;
def FalseLanesZero : FalseLanesEnum<1>;
def FalseLanesUndef : FalseLanesEnum<2>;
// AArch64 Instruction Format
class AArch64Inst<Format f, string cstr> : Instruction {
@ -46,10 +60,12 @@ class AArch64Inst<Format f, string cstr> : Instruction {
bits<2> Form = F.Value;
// Defaults
FalseLanesEnum FalseLanes = FalseLanesNone;
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
ElementSizeEnum ElementSize = ElementSizeNone;
let TSFlags{3} = DestructiveInstType.Value;
let TSFlags{8-7} = FalseLanes.Value;
let TSFlags{6-3} = DestructiveInstType.Value;
let TSFlags{2-0} = ElementSize.Value;
let Pattern = [];

View File

@ -119,11 +119,25 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
case TargetOpcode::BUNDLE:
NumBytes = getInstBundleLength(MI);
break;
}
return NumBytes;
}
unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
unsigned Size = 0;
MachineBasicBlock::const_instr_iterator I = MI.getIterator();
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
Size += getInstSizeInBytes(*I);
}
return Size;
}
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
SmallVectorImpl<MachineOperand> &Cond) {
// Block ends with fall-through condbranch.
@ -6680,5 +6694,10 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
#define GET_INSTRINFO_HELPERS
#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"

View File

@ -271,6 +271,8 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
uint64_t getElementSizeForOpcode(unsigned Opc) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
@ -295,6 +297,8 @@ protected:
isCopyInstrImpl(const MachineInstr &MI) const override;
private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
/// Sets the offsets on outlined instructions in \p MBB which use SP
/// so that they will be valid post-outlining.
///
@ -381,7 +385,8 @@ static inline bool isIndirectBranchOpcode(int Opc) {
// struct TSFlags {
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
// }
namespace AArch64 {
@ -396,13 +401,31 @@ enum ElementSizeType {
};
enum DestructiveInstType {
DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
};
enum FalseLaneType {
FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3),
FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1),
FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
};
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
#undef TSFLAG_FALSE_LANE_TYPE
int getSVEPseudoMap(uint16_t Opcode);
int getSVERevInstr(uint16_t Opcode);
int getSVEOrigInstr(uint16_t Opcode);
}
} // end namespace llvm

View File

@ -239,19 +239,32 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", int_aarch64_sve_fadd>;
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", int_aarch64_sve_fsub>;
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", int_aarch64_sve_fmul>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", int_aarch64_sve_fsubr>;
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", int_aarch64_sve_fmaxnm>;
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", int_aarch64_sve_fminnm>;
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", int_aarch64_sve_fmax>;
defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", int_aarch64_sve_fmin>;
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", int_aarch64_sve_fabd>;
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ", 1>;
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", 0>;
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>;
defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", int_aarch64_sve_fmulx>;
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", int_aarch64_sve_fdivr>;
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", int_aarch64_sve_fdiv>;
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>;
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>;
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>;
defm FADD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fadd>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsub>;
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmul>;
defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fsubr>;
defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmaxnm>;
defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fminnm>;
defm FMAX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmax>;
defm FMIN_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmin>;
defm FABD_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fabd>;
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fmulx>;
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdivr>;
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zx<int_aarch64_sve_fdiv>;
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;

View File

@ -644,4 +644,7 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
// SVE bundles move prefixes with destructive operations.
addPass(createUnpackMachineBundles(nullptr));
}

View File

@ -367,8 +367,16 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
def SVEDup0 : ComplexPattern<i64, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
let AddedComplexity = 1 in {
class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
: Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))),
(inst $Op1, $Op2, $Op3)>;
}
//
// Common but less generic patterns.
//
@ -378,6 +386,55 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
//
// Pseudo -> Instruction mappings
//
def getSVEPseudoMap : InstrMapping {
let FilterClass = "SVEPseudo2Instr";
let RowFields = ["PseudoName"];
let ColFields = ["IsInstr"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
}
class SVEPseudo2Instr<string name, bit instr> {
string PseudoName = name;
bit IsInstr = instr;
}
def getSVERevInstr : InstrMapping {
let FilterClass = "SVEInstr2Rev";
let RowFields = ["InstrName"];
let ColFields = ["IsOrig"];
let KeyCol = ["1"];
let ValueCols = [["0"]];
}
def getSVEOrigInstr : InstrMapping {
let FilterClass = "SVEInstr2Rev";
let RowFields = ["InstrName"];
let ColFields = ["IsOrig"];
let KeyCol = ["0"];
let ValueCols = [["1"]];
}
class SVEInstr2Rev<string name, string revname, bit nameIsOrig> {
string InstrName = !if(nameIsOrig, name, revname);
bit IsOrig = nameIsOrig;
}
//
// Pseudos for destructive operands
//
let hasNoSchedulingInfo = 1 in {
class PredTwoOpPseudo<string name, ZPRRegOp zprty,
FalseLanesEnum flags = FalseLanesNone>
: SVEPseudo2Instr<name, 0>,
Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> {
let FalseLanes = flags;
}
}
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
//===----------------------------------------------------------------------===//
@ -1427,11 +1484,17 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let ElementSize = zprty.ElementSize;
}
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm,
SDPatternOperator op> {
def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
SDPatternOperator op, DestructiveInstTypeEnum flags,
string revname="", bit isOrig=0> {
let DestructiveInstType = flags in {
def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>,
SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isOrig>;
def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>,
SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isOrig>;
def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>,
SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isOrig>;
}
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
@ -1449,6 +1512,16 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_2op_p_zds_zx<SDPatternOperator op> {
def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
def : SVE_3_Op_Pat_SelZero<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _ZERO_H)>;
def : SVE_3_Op_Pat_SelZero<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _ZERO_S)>;
def : SVE_3_Op_Pat_SelZero<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _ZERO_D)>;
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",

View File

@ -68,6 +68,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: AArch64 Branch Targets
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis

View File

@ -178,6 +178,7 @@
; CHECK-NEXT: AArch64 Branch Targets
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: AArch64 Compress Jump Tables
; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis

View File

@ -0,0 +1,261 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve < %s | FileCheck %s
;
; FADD
;
define <vscale x 4 x float> @fadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fadd_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fadd_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FMAX
;
define <vscale x 4 x float> @fmax_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmax_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmax_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmax_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FMAXNM
;
define <vscale x 4 x float> @fmaxnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmaxnm_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmaxnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmaxnm_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FMIN
;
define <vscale x 4 x float> @fmin_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmin_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmin_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmin_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FMINNM
;
define <vscale x 4 x float> @fminnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fminnm_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fminnm_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FMUL
;
define <vscale x 4 x float> @fmul_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmul_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmul_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FSUB
;
define <vscale x 4 x float> @fsub_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fsub_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fsub_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fsub_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
;
; FSUBR
;
define <vscale x 4 x float> @fsubr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fsubr_s:
; CHECK: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
<vscale x 4 x float> %a_z,
<vscale x 4 x float> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fsubr_d:
; CHECK: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
<vscale x 2 x double> %a_z,
<vscale x 2 x double> %b)
ret <vscale x 2 x double> %out
}
declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)