[AMDGPU] gfx11 VOPD instructions MC support

VOPD is a new encoding for dual-issue instructions for use in wave32.
This patch includes MC layer support only.

A VOPD instruction is constituted of an X component (for which there are
13 possible opcodes) and a Y component (for which there are the 13 X
opcodes plus 3 more). Most of the complexity in defining and parsing
a VOPD operation arises from the possible different total numbers of
operands and deferred parsing of certain operands depending on the
constituent X and Y opcodes.

Reviewed By: dp

Differential Revision: https://reviews.llvm.org/D128218
This commit is contained in:
Joe Nash 2022-05-25 14:09:11 -04:00
parent 78a31bb969
commit 07b7fada73
14 changed files with 604 additions and 34 deletions

View File

@ -87,6 +87,17 @@ class PredConcat<list<Predicate> lst, Predicate pred> {
!listconcat([pred], !filter(item, lst, !ne(item, pred)));
}
// Add a Register to the list if does not already exist
class RegAppend<list<Register> lst, Register reg> {
list<Register> ret =
!listconcat([reg], !filter(item, lst, !ne(item, reg)));
}
// Get the union of two Register lists
class RegListUnion<list<Register> lstA, list<Register> lstB> {
list<Register> ret =
!foldl(lstA, lstB, temp, item, RegAppend<temp, item>.ret);
}
class PredicateControl {
Predicate SubtargetPredicate = TruePredicate;
Predicate AssemblerPredicate = TruePredicate;

View File

@ -1680,7 +1680,7 @@ private:
bool parseExpr(int64_t &Imm, StringRef Expected = "");
bool parseExpr(OperandVector &Operands);
StringRef getTokenStr() const;
AsmToken peekToken();
AsmToken peekToken(bool ShouldSkipSpace = true);
AsmToken getToken() const;
SMLoc getLoc() const;
void lex();
@ -1738,6 +1738,7 @@ public:
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
@ -1804,6 +1805,7 @@ public:
AMDGPUOperand::Ptr defaultWaitVDST() const;
AMDGPUOperand::Ptr defaultWaitEXP() const;
OperandMatchResultTy parseVOPD(OperandVector &Operands);
};
struct OptionalOperand {
@ -2909,7 +2911,8 @@ OperandMatchResultTy
AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
// TODO: add syntactic sugar for 1/(2*PI)
assert(!isRegister());
if (isRegister())
return MatchOperand_NoMatch;
assert(!isModifier());
const auto& Tok = getToken();
@ -5671,8 +5674,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
OperandMode Mode) {
OperandMatchResultTy ResTy = parseVOPD(Operands);
if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
isToken(AsmToken::EndOfStatement))
return ResTy;
// Try to parse with a custom parser
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
ResTy = MatchOperandParserImpl(Operands, Mnemonic);
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
@ -7108,9 +7116,10 @@ AMDGPUAsmParser::getToken() const {
return Parser.getTok();
}
AsmToken
AMDGPUAsmParser::peekToken() {
return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
return isToken(AsmToken::EndOfStatement)
? getToken()
: getLexer().peekTok(ShouldSkipSpace);
}
void
@ -8316,6 +8325,118 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
cvtVOP3P(Inst, Operands, OptIdx);
}
//===----------------------------------------------------------------------===//
// VOPD
//===----------------------------------------------------------------------===//
OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
if (!hasVOPD(getSTI()))
return MatchOperand_NoMatch;
if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
SMLoc S = getLoc();
lex();
lex();
Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
const MCExpr *Expr;
if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
return MatchOperand_Success;
}
Error(S, "invalid VOPD :: usage");
return MatchOperand_ParseFail;
}
return MatchOperand_NoMatch;
}
// Create VOPD MCInst operands using parsed assembler operands.
// Parsed VOPD operands are ordered as follows:
// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
// If both OpX and OpY have an imm, the first imm has a different name:
// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
// MCInst operands have the following order:
// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
auto addOp = [&](uint16_t i) { // NOLINT:function pointer
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
return;
}
if (Op.isImm()) {
Op.addImmOperands(Inst, 1);
return;
}
// Handle tokens like 'offen' which are sometimes hard-coded into the
// asm string. There are no MCInst operands for these.
if (Op.isToken()) {
return;
}
llvm_unreachable("Unhandled operand type in cvtVOPD");
};
// Indices into MCInst.Operands
const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ...
unsigned Opc = Inst.getOpcode();
bool HasVsrc1X =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
bool HasImmX =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
(HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
FmamkOpXImmMCIndex ||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
FmaakOpXImmMCIndex));
bool HasVsrc1Y =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
bool HasImmY =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
MinOpYImmMCIndex + HasVsrc1X;
// Indices of parsed operands relative to dst
const auto DstIdx = 0;
const auto Src0Idx = 1;
const auto Vsrc1OrImmIdx = 2;
const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
// Offsets into parsed operands
const auto OpXFirstOperandOffset = 1;
const auto OpYFirstOperandOffset =
OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
// Order of addOp calls determines MC operand order
addOp(OpXFirstOperandOffset + DstIdx); // vdstX
addOp(OpYFirstOperandOffset + DstIdx); // vdstY
addOp(OpXFirstOperandOffset + Src0Idx); // src0X
if (HasImmX) {
// immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
} else {
if (HasVsrc1X) // all except v_mov
addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
}
addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
if (HasImmY) {
// immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
} else {
if (HasVsrc1Y) // all except v_mov
addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
}
}
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//

View File

@ -288,6 +288,12 @@ decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
}
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
}
static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
const MCRegisterInfo *MRI) {
if (OpIdx < 0)
@ -448,6 +454,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOPCDPPInst(MI);
break;
}
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
if (Res)
break;
}
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@ -971,6 +980,8 @@ DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
unsigned DescNumOps = Desc.getNumOperands();
insertNamedMCOperand(MI, MCOperand::createImm(Literal),
AMDGPU::OpName::immDeferred);
assert(DescNumOps == MI.getNumOperands());
for (unsigned I = 0; I < DescNumOps; ++I) {
auto &Op = MI.getOperand(I);
@ -1213,6 +1224,9 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
if (HasLiteral) {
assert(
AMDGPU::hasVOPD(STI) &&
"Should only decode multiple kimm with VOPD, check VSrc operand types");
if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
@ -1505,6 +1519,20 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c
llvm_unreachable("unknown dst register");
}
// Bit 0 of DstY isn't stored in the instruction, because it's always the
// opposite of bit 0 of DstX.
MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
unsigned Val) const {
int VDstXInd =
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
assert(VDstXInd != -1);
assert(Inst.getOperand(VDstXInd).isReg());
unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
Val |= ~XDstReg & 1;
auto Width = llvm::AMDGPUDisassembler::OPW32;
return createRegOperand(getVgprClassId(Width), Val);
}
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
using namespace AMDGPU;

View File

@ -233,6 +233,7 @@ public:
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral = false) const;
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;

View File

@ -2074,6 +2074,15 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
}
class getAsmVOPDPart <int NumSrcArgs, string XorY> {
string dst = "$vdst" # XorY;
string src0 = ", $src0" # XorY;
string src1 = ", $vsrc1" # XorY;
string ret = dst #
!if(!ge(NumSrcArgs, 1), src0, "") #
!if(!ge(NumSrcArgs, 2), src1, "");
}
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
@ -2513,6 +2522,14 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
// It is a slight misnomer to use the deferred f32 operand type for non-float
// operands, but this operand type will only be used if the other dual
// component is FMAAK or FMAMK
field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
@ -2536,6 +2553,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
field string TieRegDPP = "$old";
}

View File

@ -1783,6 +1783,10 @@ bool hasMAIInsts(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts];
}
bool hasVOPD(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureVOPD];
}
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
int32_t ArgNumVGPR) {
if (has90AInsts && ArgNumAGPR)

View File

@ -823,6 +823,7 @@ bool isGFX90A(const MCSubtargetInfo &STI);
bool isGFX940(const MCSubtargetInfo &STI);
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
bool hasVOPD(const MCSubtargetInfo &STI);
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
/// Is Reg - scalar register

View File

@ -110,13 +110,17 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
}
multiclass VOP1Inst <string opName, VOPProfile P,
SDPatternOperator node = null_frag> {
SDPatternOperator node = null_frag, int VOPDOp = -1> {
// We only want to set this on the basic, non-SDWA or DPP forms.
defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"),
!eq(opName, "v_mov_b64"));
let isMoveImm = should_mov_imm in {
def _e32 : VOP1_Pseudo <opName, P>;
if !eq(VOPDOp, -1) then
def _e32 : VOP1_Pseudo <opName, P>;
else
// Only for V_MOV_B32
def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">;
def _e64 : VOP3InstBase <opName, P, node>;
}
@ -182,8 +186,15 @@ let VOPAsmPrefer32Bit = 1 in {
defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>;
}
def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let InsVOPDX = (ins Src0RC32:$src0X);
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
let InsVOPDY = (ins Src0RC32:$src0Y);
let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
let SubtargetPredicate = isGFX940Plus in
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;

View File

@ -140,7 +140,13 @@ multiclass VOP2Inst_e32<string opName,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
} // End renamedInGFX9 = GFX9Renamed
}
multiclass
VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
string VOPDName, SDPatternOperator node = null_frag,
string revOp = opName, bit GFX9Renamed = 0> {
defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
VOPD_Component<VOPDOp, VOPDName>;
}
multiclass VOP2Inst_e64<string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@ -180,6 +186,22 @@ multiclass VOP2Inst<string opName,
}
}
multiclass VOP2Inst_VOPD<string opName,
VOPProfile P,
bits<5> VOPDOp,
string VOPDName,
SDPatternOperator node = null_frag,
string revOp = opName,
bit GFX9Renamed = 0> :
VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_sdwa<opName, P, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
}
multiclass VOP2bInst <string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@ -230,16 +252,19 @@ multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
}
}
multiclass VOP2eInst <string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName,
bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
multiclass
VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
SDPatternOperator node, string revOp, bit useSGPRInput> {
let SchedRW = [Write32Bit] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
if !eq(VOPDOp, -1) then
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
else
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
VOPD_Component<VOPDOp, VOPDName>;
foreach _ = BoolToList<P.HasExtSDWA>.ret in
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
@ -262,6 +287,16 @@ multiclass VOP2eInst <string opName,
}
}
multiclass
VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag,
string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
: VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>;
multiclass
VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
SDPatternOperator node = null_frag, string revOp = opName,
bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
: VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
@ -283,12 +318,24 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
}
}
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
string AsmVOPDXDeferred = ?;
}
class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
(ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
// Note that both src0X and imm are deferred
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred);
field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
field string Asm32 = "$vdst, $src0, $src1, $imm";
field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm";
let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred";
field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm";
field bit HasExt = 0;
let IsSingle = 1;
}
@ -296,10 +343,17 @@ class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MADAK_F16 : VOP_MADAK <f16>;
def VOP_MADAK_F32 : VOP_MADAK <f32>;
class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X);
field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
field string Asm32 = "$vdst, $src0, $imm, $src1";
field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X";
let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X";
field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y";
field bit HasExt = 0;
let IsSingle = 1;
}
@ -537,31 +591,31 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
let SubtargetPredicate = isGFX11Plus in
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
let isReMaterializable = 1 in {
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>;
defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>;
defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
} // End isReMaterializable = 1
@ -593,7 +647,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
}
@ -783,7 +837,7 @@ let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in
defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
} // End SubtargetPredicate = HasDLInsts
@ -811,7 +865,7 @@ let Constraints = "$vdst = $src2",
isCommutable = 1,
IsDOT = 1 in {
let SubtargetPredicate = HasDot5Insts in
defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">;
let SubtargetPredicate = HasDot6Insts in
defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
@ -849,10 +903,10 @@ let AddedComplexity = 30 in {
} // End AddedComplexity = 30
let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
let isCommutable = 1 in
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
}
let SubtargetPredicate = isGFX10Plus in {

View File

@ -0,0 +1,159 @@
//===-- VOPDInstructions.td - Vector Instruction Definitions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Encodings
//===----------------------------------------------------------------------===//
class VOPDe<bits<4> opX, bits<5> opY> : Enc64 {
bits<9> src0X;
bits<8> vsrc1X;
bits<8> vdstX;
bits<9> src0Y;
bits<8> vsrc1Y;
bits<8> vdstY;
let Inst{8-0} = src0X;
let Inst{16-9} = vsrc1X;
let Inst{21-17} = opY;
let Inst{25-22} = opX;
let Inst{31-26} = 0x32; // encoding
let Inst{40-32} = src0Y;
let Inst{48-41} = vsrc1Y;
let Inst{55-49} = vdstY{7-1};
let Inst{63-56} = vdstX;
}
class VOPD_MADKe<bits<4> opX, bits<5> opY> : Enc96 {
bits<9> src0X;
bits<8> vsrc1X;
bits<8> vdstX;
bits<9> src0Y;
bits<8> vsrc1Y;
bits<8> vdstY;
bits<32> imm;
let Inst{8-0} = src0X;
let Inst{16-9} = vsrc1X;
let Inst{21-17} = opY;
let Inst{25-22} = opX;
let Inst{31-26} = 0x32; // encoding
let Inst{40-32} = src0Y;
let Inst{48-41} = vsrc1Y;
let Inst{55-49} = vdstY{7-1};
let Inst{63-56} = vdstX;
let Inst{95-64} = imm;
}
//===----------------------------------------------------------------------===//
// VOPD classes
//===----------------------------------------------------------------------===//
class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
VOPD_Component XasVC, VOPD_Component YasVC>
: VOPAnyCommon<outs, ins, asm, []>,
VOP<NAME>,
SIMCInstr<NAME, SIEncodingFamily.GFX11> {
// Fields for table indexing
Instruction Opcode = !cast<Instruction>(NAME);
bits<5> OpX = XasVC.VOPDOp;
bits<5> OpY = YasVC.VOPDOp;
let VALU = 1;
let DecoderNamespace = "GFX11";
let AssemblerPredicate = isGFX11Plus;
let WaveSizePredicate = isWave32;
let isCodeGenOnly = 0;
let SubtargetPredicate = isGFX11Plus;
let AsmMatchConverter = "cvtVOPD";
let Size = 8;
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
let mayRaiseFPException = ReadsModeReg;
let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);
}
class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
VOPD_Component XasVC, VOPD_Component YasVC>
: VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
}
class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
VOPD_Component XasVC, VOPD_Component YasVC>
: VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
let Size = 12;
}
// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
// not. Since we generate the DUAL form by converting from the normal form we
// will never generate it.
defvar VOPDYPseudos = [
"V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
"V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
"V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32",
"V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32"
];
defvar VOPDXPseudos = VOPDYPseudos[0...VOPDX_Max_Index];
def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
let DecoderMethod = "decodeOperandVOPDDstY";
}
foreach x = VOPDXPseudos in {
foreach y = VOPDYPseudos in {
defvar xInst = !cast<VOP_Pseudo>(x);
defvar yInst = !cast<VOP_Pseudo>(y);
defvar XasVC = !cast<VOPD_Component>(x);
defvar YasVC = !cast<VOPD_Component>(y);
defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"),
!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
// If X or Y is MADK (have a mandatory immediate), all src operands which
// may contain an optional literal must use the VSrc_*_Deferred operand
// type. Optional literal operands in MADK VOPD components always use this
// operand form. If Both X and Y are MADK, the mandatory literal of X
// additionally must use an alternate operand format which defers to the
// 'real' Y literal
defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2);
defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
if !or(isOpXMADK, isOpYMADK) then {
if !and(isOpXMADK, isOpYMADK) then {
defvar X_MADK_Pfl = !cast<VOP_MADK_Base>(xInst.Pfl);
defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
} else {
defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
if isOpXMADK then {
assert !not(isOpYMADK), "Expected only OpX as MADK";
defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred);
def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
} else {
assert !not(isOpXMADK), "Expected only OpY as MADK";
defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
}
}
} else {
defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY);
defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
def OpName : VOPD<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
}
}
}

View File

@ -30,6 +30,16 @@ class VOP <string opName> {
string OpName = opName;
}
// First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted
defvar VOPDX_Max_Index = 12;
class VOPD_Component<bits<5> OpIn, string vOPDName> {
Instruction BaseVOP = !cast<Instruction>(NAME);
string VOPDName = "v_dual_" # !substr(vOPDName, 2);
bits<5> VOPDOp = OpIn;
bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index);
}
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
@ -1417,6 +1427,7 @@ include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
include "VOP3PInstructions.td"
include "VOPDInstructions.td"
class VOPInfoTable <string Format> : GenericTable {

View File

@ -57,6 +57,10 @@ v_cvt_f16_u16_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0]
v_cvt_f16_u16_e64_dpp v5, s1 row_shl:1 row_mask:0xf bank_mask:0xf
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
; disallow space between colons
v_dual_mul_f32 v0, v0, v2 : : v_dual_mul_f32 v1, v1, v3
// GFX11: [[@LINE-1]]:{{[0-9]+}}: error: unknown token in expression
// On GFX11, v_dot8_i32_i4 is a valid SP3 alias for v_dot8_i32_iu4.
// However, we intentionally leave it unimplemented because on other
// processors v_dot8_i32_i4 denotes an instruction of a different

View File

@ -0,0 +1,86 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s
v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
// GFX11: encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
// GFX11: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
// GFX11: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_min_f32 v0, v1 , v2 :: v_dual_max_f32 v3, v4, v5
// GFX11: encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42
// GFX11: encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1
// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99
// GFX11: encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, v160
// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xa0,0x01,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2.741
// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_mov_b32 v247, 2
// GFX11: encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_subrev_f32 v0, v1 , v2 :: v_dual_add_nc_u32 v3, v4, v5
// GFX11: encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5
// GFX11: encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_fmamk_f32 v122, 255, 255, v161 :: v_dual_fmamk_f32 v123, 255, 255, v162
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3
// GFX11: encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ;
// GFX11: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error
;Illegal, but assembler does not check register or literal constraints for VOPD
;v_dual_fmamk_f32 v122, v74, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161

View File

@ -14571,6 +14571,66 @@
# GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
# W32: v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42 ; encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14]
0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_and_b32 v247, v160, v98 ; encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_fmamk_f32 v3, v6, 0x402f6c8b, v1 ; encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40
# W32: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 :: v_dual_add_f32 v5, 0xaf123456, v2 ; encoding: [0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf]
0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf
# W32: v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1 ; encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f]
0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f
# W32: v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde
# W32: v_dual_fmamk_f32 v122, 0xff, 0xff, v161 :: v_dual_fmamk_f32 v123, 0xff, 0xff, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00]
0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00
# W32: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99 ; encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0]
0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 0x402f6c8b ; encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 2 ; encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40
# W32: v_dual_min_f32 v0, v1, v2 :: v_dual_max_f32 v3, v4, v5 ; encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00]
0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00
# W32: v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b ; encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40
# W32: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3 ; encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf]
0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf
# W32: v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
# W32: v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 ; encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00
# W32: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00]
0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00
# W32: v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
# W32: v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24
# W32: v_dual_subrev_f32 v0, v1, v2 :: v_dual_add_nc_u32 v3, v4, v5 ; encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00]
0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00
# GFX11: v_exp_f32_e32 v255, v1 ; encoding: [0x01,0x4b,0xfe,0x7f]
0x01,0x4b,0xfe,0x7f