forked from OSchip/llvm-project
[AMDGPU] gfx11 VOPD instructions MC support
VOPD is a new encoding for dual-issue instructions for use in wave32. This patch includes MC layer support only. A VOPD instruction is constituted of an X component (for which there are 13 possible opcodes) and a Y component (for which there are the 13 X opcodes plus 3 more). Most of the complexity in defining and parsing a VOPD operation arises from the possible different total numbers of operands and deferred parsing of certain operands depending on the constituent X and Y opcodes. Reviewed By: dp Differential Revision: https://reviews.llvm.org/D128218
This commit is contained in:
parent
78a31bb969
commit
07b7fada73
|
@ -87,6 +87,17 @@ class PredConcat<list<Predicate> lst, Predicate pred> {
|
|||
!listconcat([pred], !filter(item, lst, !ne(item, pred)));
|
||||
}
|
||||
|
||||
// Add a Register to the list if does not already exist
|
||||
class RegAppend<list<Register> lst, Register reg> {
|
||||
list<Register> ret =
|
||||
!listconcat([reg], !filter(item, lst, !ne(item, reg)));
|
||||
}
|
||||
// Get the union of two Register lists
|
||||
class RegListUnion<list<Register> lstA, list<Register> lstB> {
|
||||
list<Register> ret =
|
||||
!foldl(lstA, lstB, temp, item, RegAppend<temp, item>.ret);
|
||||
}
|
||||
|
||||
class PredicateControl {
|
||||
Predicate SubtargetPredicate = TruePredicate;
|
||||
Predicate AssemblerPredicate = TruePredicate;
|
||||
|
|
|
@ -1680,7 +1680,7 @@ private:
|
|||
bool parseExpr(int64_t &Imm, StringRef Expected = "");
|
||||
bool parseExpr(OperandVector &Operands);
|
||||
StringRef getTokenStr() const;
|
||||
AsmToken peekToken();
|
||||
AsmToken peekToken(bool ShouldSkipSpace = true);
|
||||
AsmToken getToken() const;
|
||||
SMLoc getLoc() const;
|
||||
void lex();
|
||||
|
@ -1738,6 +1738,7 @@ public:
|
|||
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
|
||||
OptionalImmIndexMap &OptionalIdx);
|
||||
|
||||
|
@ -1804,6 +1805,7 @@ public:
|
|||
|
||||
AMDGPUOperand::Ptr defaultWaitVDST() const;
|
||||
AMDGPUOperand::Ptr defaultWaitEXP() const;
|
||||
OperandMatchResultTy parseVOPD(OperandVector &Operands);
|
||||
};
|
||||
|
||||
struct OptionalOperand {
|
||||
|
@ -2909,7 +2911,8 @@ OperandMatchResultTy
|
|||
AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
|
||||
// TODO: add syntactic sugar for 1/(2*PI)
|
||||
|
||||
assert(!isRegister());
|
||||
if (isRegister())
|
||||
return MatchOperand_NoMatch;
|
||||
assert(!isModifier());
|
||||
|
||||
const auto& Tok = getToken();
|
||||
|
@ -5671,8 +5674,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
|
|||
OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
|
||||
OperandMode Mode) {
|
||||
OperandMatchResultTy ResTy = parseVOPD(Operands);
|
||||
if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
|
||||
isToken(AsmToken::EndOfStatement))
|
||||
return ResTy;
|
||||
|
||||
// Try to parse with a custom parser
|
||||
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
|
||||
ResTy = MatchOperandParserImpl(Operands, Mnemonic);
|
||||
|
||||
// If we successfully parsed the operand or if there as an error parsing,
|
||||
// we are done.
|
||||
|
@ -7108,9 +7116,10 @@ AMDGPUAsmParser::getToken() const {
|
|||
return Parser.getTok();
|
||||
}
|
||||
|
||||
AsmToken
|
||||
AMDGPUAsmParser::peekToken() {
|
||||
return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
|
||||
AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
|
||||
return isToken(AsmToken::EndOfStatement)
|
||||
? getToken()
|
||||
: getLexer().peekTok(ShouldSkipSpace);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -8316,6 +8325,118 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
|
|||
cvtVOP3P(Inst, Operands, OptIdx);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOPD
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
|
||||
if (!hasVOPD(getSTI()))
|
||||
return MatchOperand_NoMatch;
|
||||
|
||||
if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
|
||||
SMLoc S = getLoc();
|
||||
lex();
|
||||
lex();
|
||||
Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
|
||||
const MCExpr *Expr;
|
||||
if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
|
||||
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
Error(S, "invalid VOPD :: usage");
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
return MatchOperand_NoMatch;
|
||||
}
|
||||
|
||||
// Create VOPD MCInst operands using parsed assembler operands.
|
||||
// Parsed VOPD operands are ordered as follows:
|
||||
// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
|
||||
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
|
||||
// If both OpX and OpY have an imm, the first imm has a different name:
|
||||
// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
|
||||
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
|
||||
// MCInst operands have the following order:
|
||||
// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
|
||||
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
|
||||
auto addOp = [&](uint16_t i) { // NOLINT:function pointer
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
|
||||
if (Op.isReg()) {
|
||||
Op.addRegOperands(Inst, 1);
|
||||
return;
|
||||
}
|
||||
if (Op.isImm()) {
|
||||
Op.addImmOperands(Inst, 1);
|
||||
return;
|
||||
}
|
||||
// Handle tokens like 'offen' which are sometimes hard-coded into the
|
||||
// asm string. There are no MCInst operands for these.
|
||||
if (Op.isToken()) {
|
||||
return;
|
||||
}
|
||||
llvm_unreachable("Unhandled operand type in cvtVOPD");
|
||||
};
|
||||
|
||||
// Indices into MCInst.Operands
|
||||
const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
|
||||
const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
|
||||
const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ...
|
||||
|
||||
unsigned Opc = Inst.getOpcode();
|
||||
bool HasVsrc1X =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
|
||||
bool HasImmX =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
|
||||
(HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
|
||||
FmamkOpXImmMCIndex ||
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
|
||||
FmaakOpXImmMCIndex));
|
||||
|
||||
bool HasVsrc1Y =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
|
||||
bool HasImmY =
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
|
||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
|
||||
MinOpYImmMCIndex + HasVsrc1X;
|
||||
|
||||
// Indices of parsed operands relative to dst
|
||||
const auto DstIdx = 0;
|
||||
const auto Src0Idx = 1;
|
||||
const auto Vsrc1OrImmIdx = 2;
|
||||
|
||||
const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
|
||||
const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
|
||||
|
||||
// Offsets into parsed operands
|
||||
const auto OpXFirstOperandOffset = 1;
|
||||
const auto OpYFirstOperandOffset =
|
||||
OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
|
||||
|
||||
// Order of addOp calls determines MC operand order
|
||||
addOp(OpXFirstOperandOffset + DstIdx); // vdstX
|
||||
addOp(OpYFirstOperandOffset + DstIdx); // vdstY
|
||||
|
||||
addOp(OpXFirstOperandOffset + Src0Idx); // src0X
|
||||
if (HasImmX) {
|
||||
// immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
|
||||
addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
|
||||
addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
|
||||
} else {
|
||||
if (HasVsrc1X) // all except v_mov
|
||||
addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
|
||||
}
|
||||
|
||||
addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
|
||||
if (HasImmY) {
|
||||
// immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
|
||||
addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
|
||||
addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
|
||||
} else {
|
||||
if (HasVsrc1Y) // all except v_mov
|
||||
addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// dpp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -288,6 +288,12 @@ decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
|
|||
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
|
||||
}
|
||||
|
||||
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
|
||||
uint64_t Addr, const void *Decoder) {
|
||||
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
|
||||
return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
|
||||
}
|
||||
|
||||
static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
|
||||
const MCRegisterInfo *MRI) {
|
||||
if (OpIdx < 0)
|
||||
|
@ -448,6 +454,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
|||
convertVOPCDPPInst(MI);
|
||||
break;
|
||||
}
|
||||
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
|
||||
if (Res)
|
||||
break;
|
||||
}
|
||||
// Reinitialize Bytes
|
||||
Bytes = Bytes_.slice(0, MaxInstBytesNum);
|
||||
|
@ -971,6 +980,8 @@ DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
|
|||
assert(HasLiteral && "Should have decoded a literal");
|
||||
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
|
||||
unsigned DescNumOps = Desc.getNumOperands();
|
||||
insertNamedMCOperand(MI, MCOperand::createImm(Literal),
|
||||
AMDGPU::OpName::immDeferred);
|
||||
assert(DescNumOps == MI.getNumOperands());
|
||||
for (unsigned I = 0; I < DescNumOps; ++I) {
|
||||
auto &Op = MI.getOperand(I);
|
||||
|
@ -1213,6 +1224,9 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
|
|||
MCOperand
|
||||
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
|
||||
if (HasLiteral) {
|
||||
assert(
|
||||
AMDGPU::hasVOPD(STI) &&
|
||||
"Should only decode multiple kimm with VOPD, check VSrc operand types");
|
||||
if (Literal != Val)
|
||||
return errOperand(Val, "More than one unique literal is illegal");
|
||||
}
|
||||
|
@ -1505,6 +1519,20 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c
|
|||
llvm_unreachable("unknown dst register");
|
||||
}
|
||||
|
||||
// Bit 0 of DstY isn't stored in the instruction, because it's always the
|
||||
// opposite of bit 0 of DstX.
|
||||
MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
|
||||
unsigned Val) const {
|
||||
int VDstXInd =
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
|
||||
assert(VDstXInd != -1);
|
||||
assert(Inst.getOperand(VDstXInd).isReg());
|
||||
unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
|
||||
Val |= ~XDstReg & 1;
|
||||
auto Width = llvm::AMDGPUDisassembler::OPW32;
|
||||
return createRegOperand(getVgprClassId(Width), Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
|
||||
using namespace AMDGPU;
|
||||
|
||||
|
|
|
@ -233,6 +233,7 @@ public:
|
|||
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
|
||||
bool MandatoryLiteral = false) const;
|
||||
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
|
||||
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
|
||||
MCOperand decodeSpecialReg32(unsigned Val) const;
|
||||
MCOperand decodeSpecialReg64(unsigned Val) const;
|
||||
|
||||
|
|
|
@ -2074,6 +2074,15 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
|
|||
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
|
||||
}
|
||||
|
||||
class getAsmVOPDPart <int NumSrcArgs, string XorY> {
|
||||
string dst = "$vdst" # XorY;
|
||||
string src0 = ", $src0" # XorY;
|
||||
string src1 = ", $vsrc1" # XorY;
|
||||
string ret = dst #
|
||||
!if(!ge(NumSrcArgs, 1), src0, "") #
|
||||
!if(!ge(NumSrcArgs, 2), src1, "");
|
||||
}
|
||||
|
||||
// Returns the assembly string for the inputs and outputs of a VOP3
|
||||
// instruction.
|
||||
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
|
||||
|
@ -2513,6 +2522,14 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
|
|||
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
|
||||
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
|
||||
DstVT>.ret;
|
||||
field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
|
||||
// It is a slight misnomer to use the deferred f32 operand type for non-float
|
||||
// operands, but this operand type will only be used if the other dual
|
||||
// component is FMAAK or FMAMK
|
||||
field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
|
||||
field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
|
||||
field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
|
||||
|
||||
|
||||
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
|
||||
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
|
||||
|
@ -2536,6 +2553,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
|
|||
field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
|
||||
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
|
||||
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
|
||||
field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
|
||||
field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
|
||||
field string TieRegDPP = "$old";
|
||||
}
|
||||
|
||||
|
|
|
@ -1783,6 +1783,10 @@ bool hasMAIInsts(const MCSubtargetInfo &STI) {
|
|||
return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts];
|
||||
}
|
||||
|
||||
bool hasVOPD(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureVOPD];
|
||||
}
|
||||
|
||||
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
|
||||
int32_t ArgNumVGPR) {
|
||||
if (has90AInsts && ArgNumAGPR)
|
||||
|
|
|
@ -823,6 +823,7 @@ bool isGFX90A(const MCSubtargetInfo &STI);
|
|||
bool isGFX940(const MCSubtargetInfo &STI);
|
||||
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
|
||||
bool hasMAIInsts(const MCSubtargetInfo &STI);
|
||||
bool hasVOPD(const MCSubtargetInfo &STI);
|
||||
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
|
||||
|
||||
/// Is Reg - scalar register
|
||||
|
|
|
@ -110,13 +110,17 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
|
|||
}
|
||||
|
||||
multiclass VOP1Inst <string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag> {
|
||||
SDPatternOperator node = null_frag, int VOPDOp = -1> {
|
||||
// We only want to set this on the basic, non-SDWA or DPP forms.
|
||||
defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"),
|
||||
!eq(opName, "v_mov_b64"));
|
||||
|
||||
let isMoveImm = should_mov_imm in {
|
||||
def _e32 : VOP1_Pseudo <opName, P>;
|
||||
if !eq(VOPDOp, -1) then
|
||||
def _e32 : VOP1_Pseudo <opName, P>;
|
||||
else
|
||||
// Only for V_MOV_B32
|
||||
def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">;
|
||||
def _e64 : VOP3InstBase <opName, P, node>;
|
||||
}
|
||||
|
||||
|
@ -182,8 +186,15 @@ let VOPAsmPrefer32Bit = 1 in {
|
|||
defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>;
|
||||
}
|
||||
|
||||
def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
|
||||
let InsVOPDX = (ins Src0RC32:$src0X);
|
||||
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
|
||||
let InsVOPDY = (ins Src0RC32:$src0Y);
|
||||
let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
|
||||
}
|
||||
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
|
||||
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
|
||||
|
||||
let SubtargetPredicate = isGFX940Plus in
|
||||
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
|
||||
|
|
|
@ -140,7 +140,13 @@ multiclass VOP2Inst_e32<string opName,
|
|||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
} // End renamedInGFX9 = GFX9Renamed
|
||||
}
|
||||
|
||||
multiclass
|
||||
VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
|
||||
string VOPDName, SDPatternOperator node = null_frag,
|
||||
string revOp = opName, bit GFX9Renamed = 0> {
|
||||
defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
|
||||
VOPD_Component<VOPDOp, VOPDName>;
|
||||
}
|
||||
multiclass VOP2Inst_e64<string opName,
|
||||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
|
@ -180,6 +186,22 @@ multiclass VOP2Inst<string opName,
|
|||
}
|
||||
}
|
||||
|
||||
multiclass VOP2Inst_VOPD<string opName,
|
||||
VOPProfile P,
|
||||
bits<5> VOPDOp,
|
||||
string VOPDName,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName,
|
||||
bit GFX9Renamed = 0> :
|
||||
VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
|
||||
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
|
||||
VOP2Inst_sdwa<opName, P, GFX9Renamed> {
|
||||
let renamedInGFX9 = GFX9Renamed in {
|
||||
foreach _ = BoolToList<P.HasExtDPP>.ret in
|
||||
def _dpp : VOP2_DPP_Pseudo <opName, P>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP2bInst <string opName,
|
||||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
|
@ -230,16 +252,19 @@ multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
|
|||
}
|
||||
}
|
||||
|
||||
multiclass VOP2eInst <string opName,
|
||||
VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName,
|
||||
bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
|
||||
multiclass
|
||||
VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
|
||||
SDPatternOperator node, string revOp, bit useSGPRInput> {
|
||||
|
||||
let SchedRW = [Write32Bit] in {
|
||||
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
if !eq(VOPDOp, -1) then
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
else
|
||||
def _e32 : VOP2_Pseudo <opName, P>,
|
||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
|
||||
VOPD_Component<VOPDOp, VOPDName>;
|
||||
|
||||
foreach _ = BoolToList<P.HasExtSDWA>.ret in
|
||||
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
|
||||
|
@ -262,6 +287,16 @@ multiclass VOP2eInst <string opName,
|
|||
}
|
||||
}
|
||||
|
||||
multiclass
|
||||
VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag,
|
||||
string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
|
||||
: VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>;
|
||||
|
||||
multiclass
|
||||
VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
|
||||
SDPatternOperator node = null_frag, string revOp = opName,
|
||||
bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
|
||||
: VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
|
||||
|
||||
class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
|
||||
InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
|
||||
|
@ -283,12 +318,24 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
|
|||
}
|
||||
}
|
||||
|
||||
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||
class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||
string AsmVOPDXDeferred = ?;
|
||||
}
|
||||
|
||||
class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
|
||||
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
|
||||
field dag Ins32 = !if(!eq(vt.Size, 32),
|
||||
(ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
|
||||
(ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
|
||||
field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
|
||||
// Note that both src0X and imm are deferred
|
||||
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred);
|
||||
field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
|
||||
|
||||
field string Asm32 = "$vdst, $src0, $src1, $imm";
|
||||
field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm";
|
||||
let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred";
|
||||
field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm";
|
||||
field bit HasExt = 0;
|
||||
let IsSingle = 1;
|
||||
}
|
||||
|
@ -296,10 +343,17 @@ class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
|||
def VOP_MADAK_F16 : VOP_MADAK <f16>;
|
||||
def VOP_MADAK_F32 : VOP_MADAK <f32>;
|
||||
|
||||
class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||
class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
|
||||
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
|
||||
field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
|
||||
field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
|
||||
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X);
|
||||
field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
|
||||
|
||||
field string Asm32 = "$vdst, $src0, $imm, $src1";
|
||||
field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X";
|
||||
let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X";
|
||||
field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y";
|
||||
field bit HasExt = 0;
|
||||
let IsSingle = 1;
|
||||
}
|
||||
|
@ -537,31 +591,31 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
|
|||
|
||||
let SubtargetPredicate = isGFX11Plus in
|
||||
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
|
||||
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
|
||||
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
|
||||
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
|
||||
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
let isReMaterializable = 1 in {
|
||||
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
|
||||
defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>;
|
||||
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
|
||||
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
|
||||
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
|
||||
defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>;
|
||||
defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>;
|
||||
defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">;
|
||||
defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>;
|
||||
defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>;
|
||||
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
|
||||
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
|
||||
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
|
||||
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
|
||||
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
|
||||
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
|
||||
defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
|
||||
defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
|
||||
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
|
||||
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
|
||||
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
|
||||
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
|
||||
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
|
||||
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
|
||||
defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
|
||||
defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
|
||||
defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
|
||||
defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
|
||||
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
|
||||
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
|
||||
} // End isReMaterializable = 1
|
||||
|
@ -593,7 +647,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
|
|||
|
||||
|
||||
let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
|
||||
defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
|
||||
defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
|
||||
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
|
||||
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
|
||||
}
|
||||
|
@ -783,7 +837,7 @@ let Constraints = "$vdst = $src2",
|
|||
DisableEncoding = "$src2",
|
||||
isConvertibleToThreeAddress = 1,
|
||||
isCommutable = 1 in
|
||||
defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
|
||||
defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
|
||||
|
@ -811,7 +865,7 @@ let Constraints = "$vdst = $src2",
|
|||
isCommutable = 1,
|
||||
IsDOT = 1 in {
|
||||
let SubtargetPredicate = HasDot5Insts in
|
||||
defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
|
||||
defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">;
|
||||
let SubtargetPredicate = HasDot6Insts in
|
||||
defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
|
||||
|
||||
|
@ -849,10 +903,10 @@ let AddedComplexity = 30 in {
|
|||
} // End AddedComplexity = 30
|
||||
|
||||
let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
|
||||
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
|
||||
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
|
||||
|
||||
let isCommutable = 1 in
|
||||
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
|
||||
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX10Plus in {
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
//===-- VOPDInstructions.td - Vector Instruction Definitions --------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Encodings
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOPDe<bits<4> opX, bits<5> opY> : Enc64 {
|
||||
bits<9> src0X;
|
||||
bits<8> vsrc1X;
|
||||
bits<8> vdstX;
|
||||
bits<9> src0Y;
|
||||
bits<8> vsrc1Y;
|
||||
bits<8> vdstY;
|
||||
|
||||
let Inst{8-0} = src0X;
|
||||
let Inst{16-9} = vsrc1X;
|
||||
let Inst{21-17} = opY;
|
||||
let Inst{25-22} = opX;
|
||||
let Inst{31-26} = 0x32; // encoding
|
||||
let Inst{40-32} = src0Y;
|
||||
let Inst{48-41} = vsrc1Y;
|
||||
let Inst{55-49} = vdstY{7-1};
|
||||
let Inst{63-56} = vdstX;
|
||||
}
|
||||
|
||||
class VOPD_MADKe<bits<4> opX, bits<5> opY> : Enc96 {
|
||||
bits<9> src0X;
|
||||
bits<8> vsrc1X;
|
||||
bits<8> vdstX;
|
||||
bits<9> src0Y;
|
||||
bits<8> vsrc1Y;
|
||||
bits<8> vdstY;
|
||||
bits<32> imm;
|
||||
|
||||
let Inst{8-0} = src0X;
|
||||
let Inst{16-9} = vsrc1X;
|
||||
let Inst{21-17} = opY;
|
||||
let Inst{25-22} = opX;
|
||||
let Inst{31-26} = 0x32; // encoding
|
||||
let Inst{40-32} = src0Y;
|
||||
let Inst{48-41} = vsrc1Y;
|
||||
let Inst{55-49} = vdstY{7-1};
|
||||
let Inst{63-56} = vdstX;
|
||||
let Inst{95-64} = imm;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOPD classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
|
||||
VOPD_Component XasVC, VOPD_Component YasVC>
|
||||
: VOPAnyCommon<outs, ins, asm, []>,
|
||||
VOP<NAME>,
|
||||
SIMCInstr<NAME, SIEncodingFamily.GFX11> {
|
||||
// Fields for table indexing
|
||||
Instruction Opcode = !cast<Instruction>(NAME);
|
||||
bits<5> OpX = XasVC.VOPDOp;
|
||||
bits<5> OpY = YasVC.VOPDOp;
|
||||
|
||||
let VALU = 1;
|
||||
|
||||
let DecoderNamespace = "GFX11";
|
||||
let AssemblerPredicate = isGFX11Plus;
|
||||
let WaveSizePredicate = isWave32;
|
||||
let isCodeGenOnly = 0;
|
||||
let SubtargetPredicate = isGFX11Plus;
|
||||
let AsmMatchConverter = "cvtVOPD";
|
||||
let Size = 8;
|
||||
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
|
||||
let mayRaiseFPException = ReadsModeReg;
|
||||
|
||||
let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
|
||||
let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
|
||||
let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);
|
||||
}
|
||||
|
||||
class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
|
||||
VOPD_Component XasVC, VOPD_Component YasVC>
|
||||
: VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
|
||||
VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
|
||||
let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
|
||||
let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
|
||||
}
|
||||
|
||||
class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
|
||||
VOPD_Component XasVC, VOPD_Component YasVC>
|
||||
: VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
|
||||
VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
|
||||
let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
|
||||
let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
|
||||
let Size = 12;
|
||||
}
|
||||
|
||||
// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
|
||||
// not. Since we generate the DUAL form by converting from the normal form we
|
||||
// will never generate it.
|
||||
defvar VOPDYPseudos = [
|
||||
"V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
|
||||
"V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
|
||||
"V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32",
|
||||
"V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32"
|
||||
];
|
||||
defvar VOPDXPseudos = VOPDYPseudos[0...VOPDX_Max_Index];
|
||||
|
||||
def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
|
||||
let DecoderMethod = "decodeOperandVOPDDstY";
|
||||
}
|
||||
|
||||
foreach x = VOPDXPseudos in {
|
||||
foreach y = VOPDYPseudos in {
|
||||
defvar xInst = !cast<VOP_Pseudo>(x);
|
||||
defvar yInst = !cast<VOP_Pseudo>(y);
|
||||
defvar XasVC = !cast<VOPD_Component>(x);
|
||||
defvar YasVC = !cast<VOPD_Component>(y);
|
||||
defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"),
|
||||
!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
|
||||
// If X or Y is MADK (have a mandatory immediate), all src operands which
|
||||
// may contain an optional literal must use the VSrc_*_Deferred operand
|
||||
// type. Optional literal operands in MADK VOPD components always use this
|
||||
// operand form. If Both X and Y are MADK, the mandatory literal of X
|
||||
// additionally must use an alternate operand format which defers to the
|
||||
// 'real' Y literal
|
||||
defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
|
||||
defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
|
||||
defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2);
|
||||
defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
|
||||
if !or(isOpXMADK, isOpYMADK) then {
|
||||
if !and(isOpXMADK, isOpYMADK) then {
|
||||
defvar X_MADK_Pfl = !cast<VOP_MADK_Base>(xInst.Pfl);
|
||||
defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
|
||||
defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
|
||||
def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
|
||||
} else {
|
||||
defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
|
||||
if isOpXMADK then {
|
||||
assert !not(isOpYMADK), "Expected only OpX as MADK";
|
||||
defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred);
|
||||
def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
|
||||
} else {
|
||||
assert !not(isOpXMADK), "Expected only OpY as MADK";
|
||||
defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
|
||||
def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY);
|
||||
defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
|
||||
def OpName : VOPD<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -30,6 +30,16 @@ class VOP <string opName> {
|
|||
string OpName = opName;
|
||||
}
|
||||
|
||||
// First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted
|
||||
defvar VOPDX_Max_Index = 12;
|
||||
|
||||
class VOPD_Component<bits<5> OpIn, string vOPDName> {
|
||||
Instruction BaseVOP = !cast<Instruction>(NAME);
|
||||
string VOPDName = "v_dual_" # !substr(vOPDName, 2);
|
||||
bits<5> VOPDOp = OpIn;
|
||||
bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index);
|
||||
}
|
||||
|
||||
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
|
||||
|
@ -1417,6 +1427,7 @@ include "VOP1Instructions.td"
|
|||
include "VOP2Instructions.td"
|
||||
include "VOP3Instructions.td"
|
||||
include "VOP3PInstructions.td"
|
||||
include "VOPDInstructions.td"
|
||||
|
||||
|
||||
class VOPInfoTable <string Format> : GenericTable {
|
||||
|
|
|
@ -57,6 +57,10 @@ v_cvt_f16_u16_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0]
|
|||
v_cvt_f16_u16_e64_dpp v5, s1 row_shl:1 row_mask:0xf bank_mask:0xf
|
||||
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
|
||||
; disallow space between colons
|
||||
v_dual_mul_f32 v0, v0, v2 : : v_dual_mul_f32 v1, v1, v3
|
||||
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression
|
||||
|
||||
// On GFX11, v_dot8_i32_i4 is a valid SP3 alias for v_dot8_i32_iu4.
|
||||
// However, we intentionally leave it unimplemented because on other
|
||||
// processors v_dot8_i32_i4 denotes an instruction of a different
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s
|
||||
|
||||
v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
|
||||
// GFX11: encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
|
||||
// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
|
||||
// GFX11: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
|
||||
// GFX11: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_min_f32 v0, v1 , v2 :: v_dual_max_f32 v3, v4, v5
|
||||
// GFX11: encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42
|
||||
// GFX11: encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1
|
||||
// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99
|
||||
// GFX11: encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
|
||||
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
|
||||
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
|
||||
// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, v160
|
||||
// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xa0,0x01,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2.741
|
||||
// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2
|
||||
// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_subrev_f32 v0, v1 , v2 :: v_dual_add_nc_u32 v3, v4, v5
|
||||
// GFX11: encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5
|
||||
// GFX11: encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
|
||||
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_fmamk_f32 v122, 255, 255, v161 :: v_dual_fmamk_f32 v123, 255, 255, v162
|
||||
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3
|
||||
// GFX11: encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ;
|
||||
// GFX11: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
|
||||
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
|
||||
|
||||
;Illegal, but assembler does not check register or literal constraints for VOPD
|
||||
;v_dual_fmamk_f32 v122, v74, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161
|
|
@ -14571,6 +14571,66 @@
|
|||
# GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
|
||||
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
|
||||
|
||||
# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
|
||||
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
|
||||
|
||||
# W32: v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42 ; encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14]
|
||||
0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14
|
||||
|
||||
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_and_b32 v247, v160, v98 ; encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
|
||||
|
||||
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_fmamk_f32 v3, v6, 0x402f6c8b, v1 ; encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40
|
||||
|
||||
# W32: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 :: v_dual_add_f32 v5, 0xaf123456, v2 ; encoding: [0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf]
|
||||
0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf
|
||||
|
||||
# W32: v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1 ; encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f]
|
||||
0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f
|
||||
|
||||
# W32: v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
|
||||
0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde
|
||||
|
||||
# W32: v_dual_fmamk_f32 v122, 0xff, 0xff, v161 :: v_dual_fmamk_f32 v123, 0xff, 0xff, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00]
|
||||
0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00
|
||||
|
||||
# W32: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99 ; encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0]
|
||||
0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0
|
||||
|
||||
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 0x402f6c8b ; encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
|
||||
|
||||
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 2 ; encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
|
||||
0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
|
||||
|
||||
# W32: v_dual_min_f32 v0, v1, v2 :: v_dual_max_f32 v3, v4, v5 ; encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00]
|
||||
0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00
|
||||
|
||||
# W32: v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b ; encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
|
||||
0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40
|
||||
|
||||
# W32: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3 ; encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf]
|
||||
0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf
|
||||
|
||||
# W32: v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
|
||||
0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
|
||||
|
||||
# W32: v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 ; encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
|
||||
0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00
|
||||
|
||||
# W32: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00]
|
||||
0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00
|
||||
|
||||
# W32: v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
|
||||
0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
|
||||
|
||||
# W32: v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
|
||||
0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
|
||||
|
||||
# W32: v_dual_subrev_f32 v0, v1, v2 :: v_dual_add_nc_u32 v3, v4, v5 ; encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00]
|
||||
0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00
|
||||
|
||||
# GFX11: v_exp_f32_e32 v255, v1 ; encoding: [0x01,0x4b,0xfe,0x7f]
|
||||
0x01,0x4b,0xfe,0x7f
|
||||
|
||||
|
|
Loading…
Reference in New Issue