[AMDGPU] Reorganize VOP3P encoding

This changes width of encoding and opcode fields to match the
documentation.

Differential Revision: https://reviews.llvm.org/D88619
This commit is contained in:
Stanislav Mekhanoshin 2020-09-30 15:01:33 -07:00
parent 7475bd5411
commit 722d792499
2 changed files with 94 additions and 94 deletions

View File

@ -418,7 +418,7 @@ def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F3
def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
multiclass VOP3P_Real_vi<bits<10> op> {
multiclass VOP3P_Real_vi<bits<7> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicate = HasVOP3PInsts;
@ -426,7 +426,7 @@ multiclass VOP3P_Real_vi<bits<10> op> {
}
}
multiclass VOP3P_Real_MAI<bits<10> op> {
multiclass VOP3P_Real_MAI<bits<7> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicate = HasMAIInsts;
@ -434,32 +434,32 @@ multiclass VOP3P_Real_MAI<bits<10> op> {
}
}
defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>;
defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>;
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>;
defm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>;
defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>;
defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>;
defm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>;
defm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>;
defm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>;
defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>;
defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
defm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>;
defm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>;
defm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>;
defm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>;
defm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>;
defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>;
defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
let SubtargetPredicate = HasMadMixInsts in {
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
let SubtargetPredicate = HasFmaMixInsts in {
@ -467,54 +467,54 @@ let DecoderNamespace = "GFX9_DL" in {
// The mad_mix instructions were renamed and their behaviors changed,
// but the opcode stayed the same so we need to put these in a
// different DecoderNamespace to avoid the ambiguity.
defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x3a0>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
}
let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot1Insts in {
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
} // End SubtargetPredicate = HasDot1Insts
let SubtargetPredicate = HasMAIInsts in {
defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x3d8>;
defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>;
defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x3c0>;
defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x3c1>;
defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x3c2>;
defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x3c4>;
defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x3c5>;
defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x3c8>;
defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x3c9>;
defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x3ca>;
defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x3cc>;
defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>;
defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x3d0>;
defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x3d1>;
defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x3d2>;
defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x3d4>;
defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x3d5>;
defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>;
defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>;
defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x3eb>;
defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>;
defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>;
defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>;
defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x40>;
defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x41>;
defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x42>;
defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x44>;
defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x45>;
defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x48>;
defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x49>;
defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x4a>;
defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x4c>;
defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x4d>;
defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x50>;
defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x51>;
defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x52>;
defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x54>;
defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x55>;
defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x68>;
defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x69>;
defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x6b>;
defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x6c>;
defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x6d>;
} // End SubtargetPredicate = HasMAIInsts
@ -523,48 +523,48 @@ defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
multiclass VOP3P_Real_gfx10<bits<10> op> {
multiclass VOP3P_Real_gfx10<bits<7> op> {
def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>;
defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>;
defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>;
defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>;
defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>;
defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>;
defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>;
defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>;
defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>;
defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>;
defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>;
defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>;
defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>;
defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>;
defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;
defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>;
defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>;
defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>;
defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>;
defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>;
defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>;
defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>;
defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>;
defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>;
defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>;
defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>;
defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>;
defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>;
defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>;
defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>;
defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>;
defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>;
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>;
let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>;
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>;
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>;
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot1Insts in {
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>;
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>;
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
} // End SubtargetPredicate = HasDot1Insts

View File

@ -296,7 +296,7 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
bits<8> vdst;
// neg, neg_hi, op_sel put in srcN_modifiers
bits<4> src0_modifiers;
@ -320,8 +320,8 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
let Inst{25-16} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{22-16} = op;
let Inst{31-23} = 0x1a7; //encoding
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
@ -332,7 +332,7 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
}
class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
class VOP3Pe_MAI <bits<7> op, VOPProfile P> : Enc64 {
bits<8> vdst;
bits<10> src0;
bits<10> src1;
@ -349,8 +349,8 @@ class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
let Inst{25-16} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{22-16} = op;
let Inst{31-23} = 0x1a7; //encoding
let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
@ -362,8 +362,8 @@ class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
}
class VOP3Pe_gfx10 <bits<10> op, VOPProfile P> : VOP3Pe<op, P> {
let Inst{31-26} = 0x33; //encoding
class VOP3Pe_gfx10 <bits<7> op, VOPProfile P> : VOP3Pe<op, P> {
let Inst{31-23} = 0x198; //encoding
}
class VOP3be_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3be<p> {