[AArch64][SVE] Asm: Add MOVPRFX instructions.

This patch adds predicated and unpredicated MOVPRFX instructions, which
can be prepended to SVE instructions that are destructive on their first
source operand, to make them a constructive operation, e.g.

  add z1.s, p0/m, z1.s, z2.s        <=> z1 = z1 + z2

can be made constructive:

  movprfx z0, z1
  add z0.s, p0/m, z0.s, z2.s        <=> z0 = z1 + z2

The predicated MOVPRFX instruction can additionally be used to zero
inactive elements, e.g.

  movprfx z0.s, p0/z, z1.s
  add z0.s, p0/m, z0.s, z2.s

Not all instructions can be prefixed with the MOVPRFX instruction
which is why this patch also adds a mechanism to validate prefixed
instructions. The exact rules when a MOVPRFX applies is detailed in
the SVE supplement of the Architectural Reference Manual.

This is patch [1/2] in a series to add MOVPRFX instructions:
- Patch [1/2]: https://reviews.llvm.org/D49592
- Patch [2/2]: https://reviews.llvm.org/D49593

Reviewers: rengolin, SjoerdMeijer, samparker, fhahn, javed.absar

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D49592

llvm-svn: 338258
This commit is contained in:
Sander de Smalen 2018-07-30 15:42:46 +00:00
parent 403826ee0f
commit 9b33309c87
6 changed files with 273 additions and 30 deletions

View File

@ -57,6 +57,14 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
let Size = 4;
}
// Enum describing whether an instruction is
// destructive in its first source operand.
class DestructiveInstTypeEnum<bits<1> val> {
bits<1> Value = val;
}
def NotDestructive : DestructiveInstTypeEnum<0>;
def Destructive : DestructiveInstTypeEnum<1>;
// Normal instructions
class I<dag oops, dag iops, string asm, string operands, string cstr,
list<dag> pattern>
@ -64,6 +72,13 @@ class I<dag oops, dag iops, string asm, string operands, string cstr,
dag OutOperandList = oops;
dag InOperandList = iops;
let AsmString = !strconcat(asm, operands);
// Destructive operations (SVE)
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
ElementSizeEnum ElementSize = ElementSizeB;
let TSFlags{3} = DestructiveInstType.Value;
let TSFlags{2-0} = ElementSize.Value;
}
class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;

View File

@ -340,6 +340,32 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return Opc == AArch64::BR;
}
// struct TSFlags {
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
// }
namespace AArch64 {
enum ElementSizeType {
ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
};
enum DestructiveInstType {
DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
};
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
}
} // end namespace llvm
#endif

View File

@ -764,18 +764,35 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
}
// Enum descibing the element size for destructive
// operations.
class ElementSizeEnum<bits<3> val> {
bits<3> Value = val;
}
def ElementSizeNone : ElementSizeEnum<0>;
def ElementSizeB : ElementSizeEnum<1>;
def ElementSizeH : ElementSizeEnum<2>;
def ElementSizeS : ElementSizeEnum<3>;
def ElementSizeD : ElementSizeEnum<4>;
def ElementSizeQ : ElementSizeEnum<5>; // Unused
class SVERegOp <string Suffix, AsmOperandClass C,
ElementSizeEnum Size,
RegisterClass RC> : RegisterOperand<RC> {
ElementSizeEnum ElementSize;
let ElementSize = Size;
let PrintMethod = !if(!eq(Suffix, ""),
"printSVERegOp<>",
"printSVERegOp<'" # Suffix # "'>");
let ParserMatchClass = C;
}
class PPRRegOp <string Suffix, AsmOperandClass C,
RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
class ZPRRegOp <string Suffix, AsmOperandClass C,
RegisterClass RC> : SVERegOp<Suffix, C, RC> {}
class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
//******************************************************************************
@ -805,11 +822,11 @@ def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
@ -817,11 +834,11 @@ def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>;
def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>;
def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>;
def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>;
def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>;
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
@ -874,28 +891,28 @@ def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>;
def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>;
def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>;
def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>;
def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>;
def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>;
def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>;
def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>;
def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>;
def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>;
def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>;
def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>;
def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">;
def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">;
def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">;
def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ZPR_3b>;
def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ZPR_3b>;
def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ZPR_3b>;
def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>;
def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>;
def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>;
def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">;
def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">;
def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">;
def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ZPR_4b>;
def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ZPR_4b>;
def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ZPR_4b>;
def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>;
def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>;
def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>;
class FPRasZPR<int Width> : AsmOperandClass{
let Name = "FPR" # Width # "asZPR";

View File

@ -220,6 +220,9 @@ let Predicates = [HasSVE] in {
def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;

View File

@ -11,6 +11,7 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@ -79,6 +80,67 @@ private:
// Map of register aliases registers via the .req directive.
StringMap<std::pair<RegKind, unsigned>> RegisterReqs;
class PrefixInfo {
public:
static PrefixInfo CreateFromInst(const MCInst &Inst, uint64_t TSFlags) {
PrefixInfo Prefix;
switch (Inst.getOpcode()) {
case AArch64::MOVPRFX_ZZ:
Prefix.Active = true;
Prefix.Dst = Inst.getOperand(0).getReg();
break;
case AArch64::MOVPRFX_ZPmZ_B:
case AArch64::MOVPRFX_ZPmZ_H:
case AArch64::MOVPRFX_ZPmZ_S:
case AArch64::MOVPRFX_ZPmZ_D:
Prefix.Active = true;
Prefix.Predicated = true;
Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
"No destructive element size set for movprfx");
Prefix.Dst = Inst.getOperand(0).getReg();
Prefix.Pg = Inst.getOperand(2).getReg();
break;
case AArch64::MOVPRFX_ZPzZ_B:
case AArch64::MOVPRFX_ZPzZ_H:
case AArch64::MOVPRFX_ZPzZ_S:
case AArch64::MOVPRFX_ZPzZ_D:
Prefix.Active = true;
Prefix.Predicated = true;
Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask;
assert(Prefix.ElementSize != AArch64::ElementSizeNone &&
"No destructive element size set for movprfx");
Prefix.Dst = Inst.getOperand(0).getReg();
Prefix.Pg = Inst.getOperand(1).getReg();
break;
default:
break;
}
return Prefix;
}
PrefixInfo() : Active(false), Predicated(false) {}
bool isActive() const { return Active; }
bool isPredicated() const { return Predicated; }
unsigned getElementSize() const {
assert(Predicated);
return ElementSize;
}
unsigned getDstReg() const { return Dst; }
unsigned getPgReg() const {
assert(Predicated);
return Pg;
}
private:
bool Active;
bool Predicated;
unsigned ElementSize;
unsigned Dst;
unsigned Pg;
} NextPrefix;
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AArch64TargetStreamer &>(TS);
@ -113,7 +175,8 @@ private:
bool parseDirectiveReq(StringRef Name, SMLoc L);
bool parseDirectiveUnreq(SMLoc L);
bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
@ -3665,12 +3728,89 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
static inline bool isMatchingOrAlias(unsigned ZReg, unsigned Reg) {
assert((ZReg >= AArch64::Z0) && (ZReg <= AArch64::Z31));
return (ZReg == ((Reg - AArch64::B0) + AArch64::Z0)) ||
(ZReg == ((Reg - AArch64::H0) + AArch64::Z0)) ||
(ZReg == ((Reg - AArch64::S0) + AArch64::Z0)) ||
(ZReg == ((Reg - AArch64::D0) + AArch64::Z0)) ||
(ZReg == ((Reg - AArch64::Q0) + AArch64::Z0)) ||
(ZReg == ((Reg - AArch64::Z0) + AArch64::Z0));
}
// FIXME: This entire function is a giant hack to provide us with decent
// operand range validation/diagnostics until TableGen/MC can be extended
// to support autogeneration of this kind of validation.
bool AArch64AsmParser::validateInstruction(MCInst &Inst,
SmallVectorImpl<SMLoc> &Loc) {
bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc) {
const MCRegisterInfo *RI = getContext().getRegisterInfo();
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
// A prefix only applies to the instruction following it. Here we extract
// prefix information for the next instruction before validating the current
// one so that in the case of failure we don't erronously continue using the
// current prefix.
PrefixInfo Prefix = NextPrefix;
NextPrefix = PrefixInfo::CreateFromInst(Inst, MCID.TSFlags);
// Before validating the instruction in isolation we run through the rules
// applicable when it follows a prefix instruction.
// NOTE: brk & hlt can be prefixed but require no additional validation.
if (Prefix.isActive() &&
(Inst.getOpcode() != AArch64::BRK) &&
(Inst.getOpcode() != AArch64::HLT)) {
// Prefixed intructions must have a destructive operand.
if ((MCID.TSFlags & AArch64::DestructiveInstTypeMask) ==
AArch64::NotDestructive)
return Error(IDLoc, "instruction is unpredictable when following a"
" movprfx, suggest replacing movprfx with mov");
// Destination operands must match.
if (Inst.getOperand(0).getReg() != Prefix.getDstReg())
return Error(Loc[0], "instruction is unpredictable when following a"
" movprfx writing to a different destination");
// Destination operand must not be used in any other location.
for (unsigned i = 1; i < Inst.getNumOperands(); ++i) {
if (Inst.getOperand(i).isReg() &&
(MCID.getOperandConstraint(i, MCOI::TIED_TO) == -1) &&
isMatchingOrAlias(Prefix.getDstReg(), Inst.getOperand(i).getReg()))
return Error(Loc[0], "instruction is unpredictable when following a"
" movprfx and destination also used as non-destructive"
" source");
}
auto PPRRegClass = AArch64MCRegisterClasses[AArch64::PPRRegClassID];
if (Prefix.isPredicated()) {
int PgIdx = -1;
// Find the instructions general predicate.
for (unsigned i = 1; i < Inst.getNumOperands(); ++i)
if (Inst.getOperand(i).isReg() &&
PPRRegClass.contains(Inst.getOperand(i).getReg())) {
PgIdx = i;
break;
}
// Instruction must be predicated if the movprfx is predicated.
if (PgIdx == -1 ||
(MCID.TSFlags & AArch64::ElementSizeMask) == AArch64::ElementSizeNone)
return Error(IDLoc, "instruction is unpredictable when following a"
" predicated movprfx, suggest using unpredicated movprfx");
// Instruction must use same general predicate as the movprfx.
if (Inst.getOperand(PgIdx).getReg() != Prefix.getPgReg())
return Error(IDLoc, "instruction is unpredictable when following a"
" predicated movprfx using a different general predicate");
// Instruction element type must match the movprfx.
if ((MCID.TSFlags & AArch64::ElementSizeMask) != Prefix.getElementSize())
return Error(IDLoc, "instruction is unpredictable when following a"
" predicated movprfx with a different element size");
}
}
// Check for indexed addressing modes w/ the base register being the
// same as a destination/source register or pair load where
// the Rt == Rt2. All of those are undefined behaviour.
@ -4516,7 +4656,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
NumOperands = Operands.size();
for (unsigned i = 1; i < NumOperands; ++i)
OperandLocs.push_back(Operands[i]->getStartLoc());
if (validateInstruction(Inst, OperandLocs))
if (validateInstruction(Inst, IDLoc, OperandLocs))
return true;
Inst.setLoc(IDLoc);

View File

@ -4250,6 +4250,48 @@ multiclass sve_int_reduce_2<bits<3> opc, string asm> {
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
}
class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
ZPRRegOp zprty, string pg_suffix, dag iops>
: I<(outs zprty:$Zd), iops,
asm, "\t$Zd, $Pg"#pg_suffix#", $Zn",
"",
[]>, Sched<[]> {
bits<3> Pg;
bits<5> Zd;
bits<5> Zn;
let Inst{31-24} = 0b00000100;
let Inst{23-22} = sz8_32;
let Inst{21-19} = 0b010;
let Inst{18-16} = opc;
let Inst{15-13} = 0b001;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
let Constraints = "$Zd = $_Zd" in {
def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m",
(ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>;
def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m",
(ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>;
def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m",
(ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>;
def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m",
(ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>;
}
}
multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> {
def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z",
(ins PPR3bAny:$Pg, ZPR8:$Zn)>;
def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z",
(ins PPR3bAny:$Pg, ZPR16:$Zn)>;
def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z",
(ins PPR3bAny:$Pg, ZPR32:$Zn)>;
def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z",
(ins PPR3bAny:$Pg, ZPR64:$Zn)>;
}
//===----------------------------------------------------------------------===//
// SVE Propagate Break Group