[AMDGPU] Support shared literals in FMAMK/FMAAK

These instructions should allow src0 to be a literal with the same value as the mandatory other literal. Enable it by introducing an operand that defers adding its value to the MI when decoding till the mandatory literal is parsed. Reviewed By: dp, foad Differential Revision: https://reviews.llvm.org/D111067 Change-Id: I22b0ae0d35bad17b6f976808e48bffe9a6af70b7
2021-10-04 10:56:30 -04:00 · 2021-10-04 10:56:30 -04:00 · b4b7e605a6
parent b41cfbfcbb
commit b4b7e605a6
19 changed files with 300 additions and 73 deletions
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@ -1542,7 +1542,7 @@ private:
  bool validateOpSel(const MCInst &Inst);
  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
  bool validateVccOperand(unsigned Reg) const;
-  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
  bool validateAGPRLdSt(const MCInst &Inst) const;
  bool validateVGPRAlign(const MCInst &Inst) const;
@ -1715,6 +1715,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
  switch (OperandType) {
  case AMDGPU::OPERAND_REG_IMM_INT32:
  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@ -1723,6 +1724,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
  case AMDGPU::OPERAND_REG_IMM_V2FP32:
  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
  case AMDGPU::OPERAND_REG_IMM_V2INT32:
+  case AMDGPU::OPERAND_KIMM32:
    return &APFloat::IEEEsingle();
  case AMDGPU::OPERAND_REG_IMM_INT64:
  case AMDGPU::OPERAND_REG_IMM_FP64:
@ -1732,6 +1734,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
    return &APFloat::IEEEdouble();
  case AMDGPU::OPERAND_REG_IMM_INT16:
  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@ -1742,6 +1745,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
  case AMDGPU::OPERAND_REG_IMM_V2INT16:
  case AMDGPU::OPERAND_REG_IMM_V2FP16:
+  case AMDGPU::OPERAND_KIMM16:
    return &APFloat::IEEEhalf();
  default:
    llvm_unreachable("unsupported fp type");
@ -2017,12 +2021,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
    case AMDGPU::OPERAND_REG_IMM_INT16:
    case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@ -2036,7 +2042,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
    case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
    case AMDGPU::OPERAND_REG_IMM_V2FP32:
    case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
-    case AMDGPU::OPERAND_REG_IMM_V2INT32: {
+    case AMDGPU::OPERAND_REG_IMM_V2INT32:
+    case AMDGPU::OPERAND_KIMM32:
+    case AMDGPU::OPERAND_KIMM16: {
      bool lost;
      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
      // Convert literal to single precision
@ -2062,6 +2070,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
  switch (OpTy) {
  case AMDGPU::OPERAND_REG_IMM_INT32:
  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@ -2101,6 +2110,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

  case AMDGPU::OPERAND_REG_IMM_INT16:
  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@ -2128,6 +2138,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
    Inst.addOperand(MCOperand::createImm(Val));
    return;
  }
+  case AMDGPU::OPERAND_KIMM32:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+    setImmKindNone();
+    return;
+  case AMDGPU::OPERAND_KIMM16:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
+    setImmKindNone();
+    return;
  default:
    llvm_unreachable("invalid operand size");
  }
@ -3250,7 +3268,8 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
       SIInstrFlags::SDWA)) {
    // Check special imm operands (used by madmk, etc)
    if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
-      ++ConstantBusUseCount;
+      ++NumLiterals;
+      LiteralSize = 4;
    }

    SmallDenseSet<unsigned> SGPRsUsed;
@ -3290,7 +3309,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,

          // An instruction may use only one literal.
          // This has been validated on the previous step.
-          // See validateVOP3Literal.
+          // See validateVOPLiteral.
          // This literal may be used as more than one operand.
          // If all these operands are of the same size,
          // this literal counts as one scalar value.
@ -3981,26 +4000,29 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
 }

-// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
-                                          const OperandVector &Operands) {
+// One unique literal can be used. VOP3 literal is only allowed in GFX10+
+bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
+                                         const OperandVector &Operands) {
  unsigned Opcode = Inst.getOpcode();
  const MCInstrDesc &Desc = MII.get(Opcode);
-  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
+      ImmIdx == -1)
    return true;

  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);

-  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};

  unsigned NumExprs = 0;
  unsigned NumLiterals = 0;
  uint32_t LiteralValue;

  for (int OpIdx : OpIndices) {
-    if (OpIdx == -1) break;
+    if (OpIdx == -1)
+      continue;

    const MCOperand &MO = Inst.getOperand(OpIdx);
    if (!MO.isImm() && !MO.isExpr())
@ -4030,7 +4052,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
  if (!NumLiterals)
    return true;

-  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
    Error(getLitLoc(Operands), "literal operands are not supported");
    return false;
  }
@ -4202,7 +4224,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
      "only one literal operand is allowed");
    return false;
  }
-  if (!validateVOP3Literal(Inst, Operands)) {
+  if (!validateVOPLiteral(Inst, Operands)) {
    return false;
  }
  if (!validateConstantBusLimitations(Inst, Operands)) {
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@ -26,6 +26,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/AMDHSAKernelDescriptor.h"

 using namespace llvm;
@ -264,6 +265,34 @@ static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
 }

+static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
+}
+
+static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
+}
+
 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
                          const MCRegisterInfo *MRI) {
  if (OpIdx < 0)
@ -626,6 +655,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
    }
  }

+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (Res && ImmLitIdx != -1)
+    Res = convertFMAanyK(MI, ImmLitIdx);
+
  // if the opcode was not recognized we'll assume a Size of 4 bytes
  // (unless there are fewer bytes left)
  Size = Res ? (MaxInstBytesNum - Bytes.size())
@ -810,6 +844,24 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
  return MCDisassembler::Success;
 }

+DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
+                                                int ImmLitIdx) const {
+  assert(HasLiteral && "Should have decoded a literal");
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+  unsigned DescNumOps = Desc.getNumOperands();
+  assert(DescNumOps == MI.getNumOperands());
+  for (unsigned I = 0; I < DescNumOps; ++I) {
+    auto &Op = MI.getOperand(I);
+    auto OpType = Desc.OpInfo[I].OperandType;
+    bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
+                         OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
+    if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
+        IsDeferredOp)
+      Op.setImm(Literal);
+  }
+  return MCDisassembler::Success;
+}
+
 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
  return getContext().getRegisterInfo()->
    getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@ -1019,6 +1071,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
  return decodeDstOp(OPW512, Val);
 }

+// Decode Literals for insts which always have a literal in the encoding
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
+  if (HasLiteral) {
+    if (Literal != Val)
+      return errOperand(Val, "More than one unique literal is illegal");
+  }
+  HasLiteral = true;
+  Literal = Val;
+  return MCOperand::createImm(Literal);
+}
+
 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
  // For now all literal constants are supposed to be unsigned integer
  // ToDo: deal with signed/unsigned 64-bit integer constants
@ -1232,7 +1296,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
  return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
 }

-MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                                          bool MandatoryLiteral) const {
  using namespace AMDGPU::EncValues;

  assert(Val < 1024); // enum10
@ -1261,8 +1326,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
  if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
    return decodeFPImmed(Width, Val);

-  if (Val == LITERAL_CONST)
-    return decodeLiteralConstant();
+  if (Val == LITERAL_CONST) {
+    if (MandatoryLiteral)
+      // Keep a sentinel value for deferred setting
+      return MCOperand::createImm(LITERAL_CONST);
+    else
+      return decodeLiteralConstant();
+  }

  switch (Width) {
  case OPW32:
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@ -87,6 +87,7 @@ public:
  DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
                                       raw_string_ostream &KdStream) const;

+  DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
  DecodeStatus convertSDWAInst(MCInst &MI) const;
  DecodeStatus convertDPP8Inst(MCInst &MI) const;
  DecodeStatus convertMIMGInst(MCInst &MI) const;
@ -150,9 +151,11 @@ public:

  static MCOperand decodeIntImmed(unsigned Imm);
  static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+  MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
  MCOperand decodeLiteralConstant() const;

-  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                        bool MandatoryLiteral = false) const;
  MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
  MCOperand decodeSpecialReg32(unsigned Val) const;
  MCOperand decodeSpecialReg64(unsigned Val) const;
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@ -605,6 +605,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
    switch (OpTy) {
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@ -631,6 +632,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
    case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
      printImmediate16(Op.getImm(), STI, O);
      break;
    case AMDGPU::OPERAND_REG_IMM_V2INT16:
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@ -233,6 +233,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
  switch (OpInfo.OperandType) {
  case AMDGPU::OPERAND_REG_IMM_INT32:
  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@ -255,6 +256,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
    return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
    // FIXME Is this correct? What do inline immediates do on SI for f16 src
@ -277,6 +279,9 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
    uint32_t Encoding = getLit16Encoding(Lo16, STI);
    return Encoding;
  }
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16:
+    return MO.getImm();
  default:
    llvm_unreachable("invalid operand size");
  }
@ -341,7 +346,13 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
      (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
    return;

-  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+  // Do not print literals from SISrc Operands for insts with mandatory literals
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (ImmLitIdx != -1)
+    return;
+
+  // Check for additional literals
  for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {

    // Check if this operand should be encoded as [SV]Src
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@ -139,64 +139,67 @@ enum ClassFlags : unsigned {
 }

 namespace AMDGPU {
-  enum OperandType : unsigned {
-    /// Operands with register or 32-bit immediate
-    OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
-    OPERAND_REG_IMM_INT64,
-    OPERAND_REG_IMM_INT16,
-    OPERAND_REG_IMM_FP32,
-    OPERAND_REG_IMM_FP64,
-    OPERAND_REG_IMM_FP16,
-    OPERAND_REG_IMM_V2FP16,
-    OPERAND_REG_IMM_V2INT16,
-    OPERAND_REG_IMM_V2INT32,
-    OPERAND_REG_IMM_V2FP32,
+enum OperandType : unsigned {
+  /// Operands with register or 32-bit immediate
+  OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+  OPERAND_REG_IMM_INT64,
+  OPERAND_REG_IMM_INT16,
+  OPERAND_REG_IMM_FP32,
+  OPERAND_REG_IMM_FP64,
+  OPERAND_REG_IMM_FP16,
+  OPERAND_REG_IMM_FP16_DEFERRED,
+  OPERAND_REG_IMM_FP32_DEFERRED,
+  OPERAND_REG_IMM_V2FP16,
+  OPERAND_REG_IMM_V2INT16,
+  OPERAND_REG_IMM_V2INT32,
+  OPERAND_REG_IMM_V2FP32,

-    /// Operands with register or inline constant
-    OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_INT32,
-    OPERAND_REG_INLINE_C_INT64,
-    OPERAND_REG_INLINE_C_FP16,
-    OPERAND_REG_INLINE_C_FP32,
-    OPERAND_REG_INLINE_C_FP64,
-    OPERAND_REG_INLINE_C_V2INT16,
-    OPERAND_REG_INLINE_C_V2FP16,
-    OPERAND_REG_INLINE_C_V2INT32,
-    OPERAND_REG_INLINE_C_V2FP32,
+  /// Operands with register or inline constant
+  OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_INT32,
+  OPERAND_REG_INLINE_C_INT64,
+  OPERAND_REG_INLINE_C_FP16,
+  OPERAND_REG_INLINE_C_FP32,
+  OPERAND_REG_INLINE_C_FP64,
+  OPERAND_REG_INLINE_C_V2INT16,
+  OPERAND_REG_INLINE_C_V2FP16,
+  OPERAND_REG_INLINE_C_V2INT32,
+  OPERAND_REG_INLINE_C_V2FP32,

-    /// Operands with an AccVGPR register or inline constant
-    OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_INT32,
-    OPERAND_REG_INLINE_AC_FP16,
-    OPERAND_REG_INLINE_AC_FP32,
-    OPERAND_REG_INLINE_AC_FP64,
-    OPERAND_REG_INLINE_AC_V2INT16,
-    OPERAND_REG_INLINE_AC_V2FP16,
-    OPERAND_REG_INLINE_AC_V2INT32,
-    OPERAND_REG_INLINE_AC_V2FP32,
+  /// Operand with 32-bit immediate that uses the constant bus.
+  OPERAND_KIMM32,
+  OPERAND_KIMM16,

-    OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
+  /// Operands with an AccVGPR register or inline constant
+  OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_INT32,
+  OPERAND_REG_INLINE_AC_FP16,
+  OPERAND_REG_INLINE_AC_FP32,
+  OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_V2INT16,
+  OPERAND_REG_INLINE_AC_V2FP16,
+  OPERAND_REG_INLINE_AC_V2INT32,
+  OPERAND_REG_INLINE_AC_V2FP32,

-    OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+  OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,

-    OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+  OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,

-    OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+  OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,

-    // Operand for source modifiers for VOP instructions
-    OPERAND_INPUT_MODS,
+  OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,

-    // Operand for SDWA instructions
-    OPERAND_SDWA_VOPC_DST,
+  // Operand for source modifiers for VOP instructions
+  OPERAND_INPUT_MODS,

-    /// Operand with 32-bit immediate that uses the constant bus.
-    OPERAND_KIMM32,
-    OPERAND_KIMM16
-  };
+  // Operand for SDWA instructions
+  OPERAND_SDWA_VOPC_DST
+
+};
 }

 // Input operand modifiers bit-masks
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@ -3405,6 +3405,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
  switch (OperandType) {
  case AMDGPU::OPERAND_REG_IMM_INT32:
  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
  case AMDGPU::OPERAND_REG_IMM_V2FP32:
@ -3443,6 +3444,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
    // This suffers the same problem as the scalar 16-bit cases.
    return AMDGPU::isInlinableIntLiteralV216(Imm);
  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
  case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
    if (isInt<16>(Imm) || isUInt<16>(Imm)) {
@ -3836,6 +3838,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
      break;
    case AMDGPU::OPERAND_REG_IMM_INT32:
    case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
      break;
    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@ -1173,6 +1173,7 @@ class kimmOperand<ValueType vt> : Operand<vt> {
  let OperandType = "OPERAND_KIMM"#vt.Size;
  let PrintMethod = "printU"#vt.Size#"ImmOperand";
  let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+  let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
 }

 // 32-bit VALU immediate operand that uses the constant bus.
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@ -1019,6 +1019,30 @@ def VSrc_128 : RegisterOperand<VReg_128> {
  let DecoderMethod = "DecodeVS_128RegisterClass";
 }

+//===----------------------------------------------------------------------===//
+//  VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
+//  with FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
+                           string rc_suffix = "_32"> {
+  let OperandNamespace = "AMDGPU" in {
+    def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP16_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+      let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
+    }
+
+    def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP32_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+      let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
+    }
+  }
+}
+
+defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
+
 //===----------------------------------------------------------------------===//
 //  VRegSrc_* Operands with a VGPR
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@ -1574,8 +1574,10 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
  switch (OpType) {
  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
  case AMDGPU::OPERAND_REG_IMM_FP64:
  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
  case AMDGPU::OPERAND_REG_IMM_V2FP16:
  case AMDGPU::OPERAND_REG_IMM_V2INT16:
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@ -789,6 +789,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
  switch (OpInfo.OperandType) {
  case AMDGPU::OPERAND_REG_IMM_INT32:
  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@ -797,6 +798,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
  case AMDGPU::OPERAND_REG_IMM_V2FP32:
  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
    return 4;

  case AMDGPU::OPERAND_REG_IMM_INT64:
@ -808,6 +811,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {

  case AMDGPU::OPERAND_REG_IMM_INT16:
  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@ -270,12 +270,11 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
 class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
  field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
  field dag Ins32 = !if(!eq(vt.Size, 32),
-                        (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
-                        (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+                        (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
+                        (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+  field string Asm32 = "$vdst, $src0, $src1, $imm";
  field bit HasExt = 0;
  let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $src1, $imm";
 }

 def VOP_MADAK_F16 : VOP_MADAK <f16>;
@ -283,11 +282,10 @@ def VOP_MADAK_F32 : VOP_MADAK <f32>;

 class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
  field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
-  field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field string Asm32 = "$vdst, $src0, $imm, $src1";
  field bit HasExt = 0;
  let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $imm, $src1";
 }

 def VOP_MADMK_F16 : VOP_MADMK <f16>;
--- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
@ -274,6 +274,26 @@ v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
 // GFX6-7: error: dpp variant of this instruction is not supported
 // GFX8-9: error: not a valid operand

+//===----------------------------------------------------------------------===//
+// VOP2
+//===----------------------------------------------------------------------===//
+
+v_fmaak_f32 v0, 0xff32ff, v0, 0x11213141
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32ff, 0x11213141, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmaak_f32 v0, 0xff32, v0, 0x1122
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32, 0x1122, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
 //===----------------------------------------------------------------------===//
 // VOP2 E64.
 //===----------------------------------------------------------------------===//
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@ -10229,9 +10229,15 @@ v_fmamk_f32 v5, v1, 0xa1b1c1d1, v3
 v_fmamk_f32 v5, v1, 0x11213141, v255
 // GFX10: encoding: [0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11]

+v_fmamk_f32 v5, 0x11213141, 0x11213141, v255
+// GFX10: encoding: [0xff,0xfe,0x0b,0x58,0x41,0x31,0x21,0x11]
+
 v_fmaak_f32 v5, v1, v2, 0x11213141
 // GFX10: encoding: [0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11]

+v_fmaak_f32 v5, 0x11213141, v2, 0x11213141
+// GFX10: encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+
 v_fmaak_f32 v255, v1, v2, 0x11213141
 // GFX10: encoding: [0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11]

@ -11969,6 +11975,9 @@ v_fmamk_f16 v5, v1, 0x1121, v3
 v_fmamk_f16 v255, v1, 0x1121, v3
 // GFX10: encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]

+v_fmamk_f16 v255, 0x1121, 0x1121, v3
+// GFX10: encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+
 v_fmamk_f16 v5, v255, 0x1121, v3
 // GFX10: encoding: [0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00]

@ -12014,6 +12023,9 @@ v_fmaak_f16 v5, -4.0, v2, 0x1121
 v_fmaak_f16 v5, v1, v255, 0x1121
 // GFX10: encoding: [0x01,0xff,0x0b,0x70,0x21,0x11,0x00,0x00]

+v_fmaak_f16 v5, 0x1121, v255, 0x1121
+// GFX10: encoding: [0xff,0xfe,0x0b,0x70,0x21,0x11,0x00,0x00]
+
 v_fmaak_f16 v5, v1, v2, 0xa1b1
 // GFX10: encoding: [0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00]

--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
@ -2337,6 +2337,9 @@ v_madmk_f16 v5, v1, 0xa1b1, v3
 v_madmk_f16 v5, v1, 0x1121, v255
 // CHECK: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]

+v_madmk_f16 v5, 0x1121, 0x1121, v255
+// CHECK: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+
 v_madak_f16 v5, v1, v2, 0x1121
 // CHECK: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]

@ -2367,6 +2370,9 @@ v_madak_f16 v5, v1, v255, 0x1121
 v_madak_f16 v5, v1, v2, 0xa1b1
 // CHECK: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]

+v_madak_f16 v5, 0x1121, v2, 0x1121
+// CHECK: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+
 v_add_u16 v5, v1, v2
 // CHECK: [0x01,0x05,0x0a,0x4c]

--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@ -843,6 +843,20 @@ v_madak_f32 v0, shared_base, v0, 0x11213141
 // NOGCN: error: invalid operand (violates constant bus restrictions)
 v_madak_f32 v0, scc, v0, 0x11213141

+// NOGCN: error: only one literal operand is allowed
+v_madak_f32 v0, 0xff32ff, v0, 0x11213141
+
+// NOGCN: error: only one literal operand is allowed
+v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madak_f16 v0, 0xff32, v0, 0x1122
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madmk_f16 v0, 0xff32, 0x1122, v0
+
 // NOSICIVI: error: register not available on this GPU
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, private_limit
--- a/llvm/test/MC/AMDGPU/vop2.s
+++ b/llvm/test/MC/AMDGPU/vop2.s
@ -270,6 +270,14 @@ v_madmk_f32 v1, v2, 64.0, v3
 // VI:   v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
 v_madak_f32 v1, v2, v3, 64.0

+// SICI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x42,0x41,0x31,0x21,0x11]
+// VI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+v_madak_f32 v0, 0x11213141, v0, 0x11213141
+
+// SICI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x40,0x41,0x31,0x21,0x11]
+// VI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+v_madmk_f32 v0, 0x11213141, 0x11213141, v0
+
 // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
 // VI:   v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
 v_bcnt_u32_b32_e64 v1, v2, v3
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@ -73781,6 +73781,9 @@
 # GFX10: v_fmaak_f16 v5, -1, v2, 0x1121          ; encoding: [0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
 0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00

+# GFX10: v_fmaak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
+
 # GFX10: v_fmaak_f32 v5, -1, v2, 0x11213141      ; encoding: [0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
 0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11

@ -73796,6 +73799,9 @@
 # GFX10: v_fmaak_f32 v5, 0, v2, 0x11213141       ; encoding: [0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
 0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11

+# GFX10: v_fmaak_f32 v5, 0x11213141, v2, 0x11213141 ; encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
+
 # GFX10: v_fmaak_f16 v5, 0.5, v2, 0x1121         ; encoding: [0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
 0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00

@ -74150,6 +74156,9 @@
 # GFX10: v_fmamk_f16 v255, v1, 0x1121, v3        ; encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
 0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00

+# GFX10: v_fmamk_f16 v255, 0x1121, 0x1121, v3    ; encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00
+
 # GFX10: v_fmamk_f32 v255, v1, 0x11213141, v3    ; encoding: [0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11]
 0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11

@ -74159,6 +74168,9 @@
 # GFX10: v_fmamk_f32 v5, -1, 0x11213141, v3      ; encoding: [0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
 0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11

+# GFX10: v_fmamk_f32 v5, 0x11213141, 0x11213141, v3 ; encoding: [0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
+0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
+
 # GFX10: v_fmamk_f16 v5, -4.0, 0x1121, v3        ; encoding: [0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00]
 0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00

--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
@ -32160,6 +32160,9 @@
 # CHECK: v_madmk_f32 v5, v1, 0x11213141, v255    ; encoding: [0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11]
 0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11

+# CHECK: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11
+
 # CHECK: v_madak_f32 v5, v1, v2, 0x11213141      ; encoding: [0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11]
 0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11

@ -32187,6 +32190,9 @@
 # CHECK: v_madak_f32 v5, v1, v2, 0xa1b1c1d1      ; encoding: [0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1]
 0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1

+# CHECK: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11
+
 # CHECK: v_add_co_u32_e32 v5, vcc, v1, v2        ; encoding: [0x01,0x05,0x0a,0x32]
 0x01,0x05,0x0a,0x32

@ -33783,6 +33789,9 @@
 # CHECK: v_madmk_f16 v5, v1, 0x1121, v255        ; encoding: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
 0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00

+# CHECK: v_madmk_f16 v5, 0x1121, 0x1121, v255    ; encoding: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00
+
 # CHECK: v_madak_f16 v5, v1, v2, 0x1121          ; encoding: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
 0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00

@ -33810,6 +33819,9 @@
 # CHECK: v_madak_f16 v5, v1, v2, 0xa1b1          ; encoding: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
 0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00

+# CHECK: v_madak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00
+
 # CHECK: v_add_u16_e32 v5, v1, v2                ; encoding: [0x01,0x05,0x0a,0x4c]
 0x01,0x05,0x0a,0x4c