[AMDGPU] gfx1010 VOP3 and VOP3P implementation

Differential Revision: https://reviews.llvm.org/D61202

llvm-svn: 359328
This commit is contained in:
Stanislav Mekhanoshin 2019-04-26 17:56:03 +00:00
parent 18bc872405
commit 61beff020e
6 changed files with 392 additions and 108 deletions

View File

@ -2691,32 +2691,41 @@ static bool IsRevOpcode(const unsigned Opcode)
case AMDGPU::V_LSHLREV_B16_e64:
case AMDGPU::V_LSHLREV_B16_e32_vi:
case AMDGPU::V_LSHLREV_B16_e64_vi:
case AMDGPU::V_LSHLREV_B16_gfx10:
case AMDGPU::V_LSHRREV_B16_e32:
case AMDGPU::V_LSHRREV_B16_e64:
case AMDGPU::V_LSHRREV_B16_e32_vi:
case AMDGPU::V_LSHRREV_B16_e64_vi:
case AMDGPU::V_LSHRREV_B16_gfx10:
case AMDGPU::V_ASHRREV_I16_e32:
case AMDGPU::V_ASHRREV_I16_e64:
case AMDGPU::V_ASHRREV_I16_e32_vi:
case AMDGPU::V_ASHRREV_I16_e64_vi:
case AMDGPU::V_ASHRREV_I16_gfx10:
case AMDGPU::V_LSHLREV_B64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_vi:
case AMDGPU::V_LSHRREV_B64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHRREV_B64_vi:
case AMDGPU::V_ASHRREV_I64:
case AMDGPU::V_ASHRREV_I64_gfx10:
case AMDGPU::V_ASHRREV_I64_vi:
case AMDGPU::V_PK_LSHLREV_B16:
case AMDGPU::V_PK_LSHLREV_B16_gfx10:
case AMDGPU::V_PK_LSHLREV_B16_vi:
case AMDGPU::V_PK_LSHRREV_B16:
case AMDGPU::V_PK_LSHRREV_B16_gfx10:
case AMDGPU::V_PK_LSHRREV_B16_vi:
case AMDGPU::V_PK_ASHRREV_I16:
case AMDGPU::V_PK_ASHRREV_I16_gfx10:
case AMDGPU::V_PK_ASHRREV_I16_vi:
return true;
default:

View File

@ -2820,9 +2820,11 @@ static bool shouldReadExec(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_READLANE_B32:
case AMDGPU::V_READLANE_B32_gfx6_gfx7:
case AMDGPU::V_READLANE_B32_gfx10:
case AMDGPU::V_READLANE_B32_vi:
case AMDGPU::V_WRITELANE_B32:
case AMDGPU::V_WRITELANE_B32_gfx6_gfx7:
case AMDGPU::V_WRITELANE_B32_gfx10:
case AMDGPU::V_WRITELANE_B32_vi:
return false;
}

View File

@ -143,15 +143,6 @@ def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
}
}
class getVOP3VCC<VOPProfile P, SDPatternOperator node> {
list<dag> ret =
[(set P.DstVT:$vdst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
(i1 VCC)))];
}
class VOP3Features<bit Clamp, bit OpSel, bit Packed> {
bit HasClamp = Clamp;
bit HasOpSel = OpSel;
@ -315,8 +306,7 @@ let Uses = [VCC, EXEC] in {
// if (vcc)
// result *= 2^32
//
def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []> {
let SchedRW = [WriteFloatFMA];
}
// v_div_fmas_f64:
@ -324,8 +314,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
// if (vcc)
// result *= 2^64
//
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []> {
let SchedRW = [WriteDouble];
let FPDPRounding = 1;
}
@ -386,12 +375,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
}
let SchedRW = [Write64Bit] in {
let SubtargetPredicate = isGFX6GFX7, Predicates = [isGFX6GFX7] in {
let SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] in {
def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
} // End SubtargetPredicate = isGFX6GFX7, Predicates = [isGFX6GFX7]
} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10]
let SubtargetPredicate = isGFX8Plus in {
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
@ -519,7 +508,7 @@ def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
} // End SubtargetPredicate = isGFX8GFX9
let Predicates = [Has16BitInsts] in {
let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
Instruction inst, SDPatternOperator op3> {
@ -533,7 +522,23 @@ def : GCNPat <
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
} // End Predicates = [Has16BitInsts]
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
let Predicates = [Has16BitInsts, isGFX10Plus] in {
multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
Instruction inst, SDPatternOperator op3> {
def : GCNPat <
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
>;
}
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9, zext>;
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9, sext>;
} // End Predicates = [Has16BitInsts, isGFX10Plus]
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
(ops node:$x, node:$y, node:$z),
@ -618,6 +623,10 @@ def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
} // End SubtargetPredicate = isGFX9Plus
let SubtargetPredicate = isGFX10Plus in {
def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
// Integer Clamp Patterns
//===----------------------------------------------------------------------===//
@ -664,100 +673,225 @@ def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// GFX6, GFX7.
// GFX10.
//===----------------------------------------------------------------------===//
let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in {
multiclass VOP3_Real_si<bits<9> op> {
def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3e_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_si<bits<9> op> {
def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3be_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7"
defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>;
defm V_MAD_F32 : VOP3_Real_si <0x141>;
defm V_MAD_I32_I24 : VOP3_Real_si <0x142>;
defm V_MAD_U32_U24 : VOP3_Real_si <0x143>;
defm V_CUBEID_F32 : VOP3_Real_si <0x144>;
defm V_CUBESC_F32 : VOP3_Real_si <0x145>;
defm V_CUBETC_F32 : VOP3_Real_si <0x146>;
defm V_CUBEMA_F32 : VOP3_Real_si <0x147>;
defm V_BFE_U32 : VOP3_Real_si <0x148>;
defm V_BFE_I32 : VOP3_Real_si <0x149>;
defm V_BFI_B32 : VOP3_Real_si <0x14a>;
defm V_FMA_F32 : VOP3_Real_si <0x14b>;
defm V_FMA_F64 : VOP3_Real_si <0x14c>;
defm V_LERP_U8 : VOP3_Real_si <0x14d>;
defm V_ALIGNBIT_B32 : VOP3_Real_si <0x14e>;
defm V_ALIGNBYTE_B32 : VOP3_Real_si <0x14f>;
defm V_MULLIT_F32 : VOP3_Real_si <0x150>;
defm V_MIN3_F32 : VOP3_Real_si <0x151>;
defm V_MIN3_I32 : VOP3_Real_si <0x152>;
defm V_MIN3_U32 : VOP3_Real_si <0x153>;
defm V_MAX3_F32 : VOP3_Real_si <0x154>;
defm V_MAX3_I32 : VOP3_Real_si <0x155>;
defm V_MAX3_U32 : VOP3_Real_si <0x156>;
defm V_MED3_F32 : VOP3_Real_si <0x157>;
defm V_MED3_I32 : VOP3_Real_si <0x158>;
defm V_MED3_U32 : VOP3_Real_si <0x159>;
defm V_SAD_U8 : VOP3_Real_si <0x15a>;
defm V_SAD_HI_U8 : VOP3_Real_si <0x15b>;
defm V_SAD_U16 : VOP3_Real_si <0x15c>;
defm V_SAD_U32 : VOP3_Real_si <0x15d>;
defm V_CVT_PK_U8_F32 : VOP3_Real_si <0x15e>;
defm V_DIV_FIXUP_F32 : VOP3_Real_si <0x15f>;
defm V_DIV_FIXUP_F64 : VOP3_Real_si <0x160>;
defm V_LSHL_B64 : VOP3_Real_si <0x161>;
defm V_LSHR_B64 : VOP3_Real_si <0x162>;
defm V_ASHR_I64 : VOP3_Real_si <0x163>;
defm V_ADD_F64 : VOP3_Real_si <0x164>;
defm V_MUL_F64 : VOP3_Real_si <0x165>;
defm V_MIN_F64 : VOP3_Real_si <0x166>;
defm V_MAX_F64 : VOP3_Real_si <0x167>;
defm V_LDEXP_F64 : VOP3_Real_si <0x168>;
defm V_MUL_LO_U32 : VOP3_Real_si <0x169>;
defm V_MUL_HI_U32 : VOP3_Real_si <0x16a>;
defm V_MUL_LO_I32 : VOP3_Real_si <0x16b>;
defm V_MUL_HI_I32 : VOP3_Real_si <0x16c>;
defm V_DIV_SCALE_F32 : VOP3be_Real_si <0x16d>;
defm V_DIV_SCALE_F64 : VOP3be_Real_si <0x16e>;
defm V_DIV_FMAS_F32 : VOP3_Real_si <0x16f>;
defm V_DIV_FMAS_F64 : VOP3_Real_si <0x170>;
defm V_MSAD_U8 : VOP3_Real_si <0x171>;
defm V_MQSAD_PK_U16_U8 : VOP3_Real_si <0x173>;
defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>;
//===----------------------------------------------------------------------===//
// GFX7.
//===----------------------------------------------------------------------===//
multiclass VOP3_Real_ci<bits<9> op> {
def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3e_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicates = [isGFX7Only];
let DecoderNamespace = "GFX7";
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
multiclass VOP3_Real_gfx10<bits<10> op> {
def _gfx10 :
VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
}
multiclass VOP3be_Real_ci<bits<9> op> {
def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3be_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicates = [isGFX7Only];
let DecoderNamespace = "GFX7";
multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName,
string asmName> {
def _gfx10 :
VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
let AsmString = asmName # ps.AsmOperands;
}
}
}
multiclass VOP3be_Real_gfx10<bits<10> op> {
def _gfx10 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3Interp_Real_gfx10<bits<10> op> {
def _gfx10 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3Interp_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3OpSel_Real_gfx10<bits<10> op> {
def _gfx10 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName,
string asmName> {
def _gfx10 :
VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
let AsmString = asmName # ps.AsmOperands;
}
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>;
defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>;
defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>;
defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
defm V_READLANE_B32 : VOP3_Real_gfx10<0x360>;
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>;
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
defm V_XOR3_B32 : VOP3_Real_gfx10<0x178>;
defm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>;
defm V_LSHRREV_B64 : VOP3_Real_gfx10<0x300>;
defm V_ASHRREV_I64 : VOP3_Real_gfx10<0x301>;
defm V_PERM_B32 : VOP3_Real_gfx10<0x344>;
defm V_XAD_U32 : VOP3_Real_gfx10<0x345>;
defm V_LSHL_ADD_U32 : VOP3_Real_gfx10<0x346>;
defm V_ADD_LSHL_U32 : VOP3_Real_gfx10<0x347>;
defm V_ADD3_U32 : VOP3_Real_gfx10<0x36d>;
defm V_LSHL_OR_B32 : VOP3_Real_gfx10<0x36f>;
defm V_AND_OR_B32 : VOP3_Real_gfx10<0x371>;
defm V_OR3_B32 : VOP3_Real_gfx10<0x372>;
// TODO-GFX10: add MC tests for v_add/sub_nc_i16
defm V_ADD_NC_I16 :
VOP3OpSel_Real_gfx10_with_name<0x30d, "V_ADD_I16", "v_add_nc_i16">;
defm V_SUB_NC_I16 :
VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
defm V_SUB_NC_I32 :
VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">;
defm V_ADD_NC_I32 :
VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
defm V_INTERP_P1LL_F16 : VOP3Interp_Real_gfx10<0x342>;
defm V_INTERP_P1LV_F16 : VOP3Interp_Real_gfx10<0x343>;
defm V_INTERP_P2_F16 : VOP3Interp_Real_gfx10<0x35a>;
defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx10<0x311>;
defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx10<0x312>;
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx10<0x313>;
defm V_MIN3_F16 : VOP3OpSel_Real_gfx10<0x351>;
defm V_MIN3_I16 : VOP3OpSel_Real_gfx10<0x352>;
defm V_MIN3_U16 : VOP3OpSel_Real_gfx10<0x353>;
defm V_MAX3_F16 : VOP3OpSel_Real_gfx10<0x354>;
defm V_MAX3_I16 : VOP3OpSel_Real_gfx10<0x355>;
defm V_MAX3_U16 : VOP3OpSel_Real_gfx10<0x356>;
defm V_MED3_F16 : VOP3OpSel_Real_gfx10<0x357>;
defm V_MED3_I16 : VOP3OpSel_Real_gfx10<0x358>;
defm V_MED3_U16 : VOP3OpSel_Real_gfx10<0x359>;
defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx10<0x373>;
defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx10<0x375>;
defm V_MAD_U16 :
VOP3OpSel_Real_gfx10_with_name<0x340, "V_MAD_U16_gfx9", "v_mad_u16">;
defm V_FMA_F16 :
VOP3OpSel_Real_gfx10_with_name<0x34b, "V_FMA_F16_gfx9", "v_fma_f16">;
defm V_MAD_I16 :
VOP3OpSel_Real_gfx10_with_name<0x35e, "V_MAD_I16_gfx9", "v_mad_i16">;
defm V_DIV_FIXUP_F16 :
VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
// (they do not support SDWA or DPP).
defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16_e64", "v_add_nc_u16">;
defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16_e64", "v_sub_nc_u16">;
defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_e64", "v_mul_lo_u16">;
defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_e64", "v_lshrrev_b16">;
defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_e64", "v_ashrrev_i16">;
defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16_e64", "v_max_u16">;
defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16_e64", "v_max_i16">;
defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16_e64", "v_min_u16">;
defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16_e64", "v_min_i16">;
defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_e64", "v_lshlrev_b16">;
//===----------------------------------------------------------------------===//
// GFX7, GFX10.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
multiclass VOP3_Real_gfx7<bits<10> op> {
def _gfx7 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_gfx7<bits<10> op> {
def _gfx7 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
multiclass VOP3_Real_gfx7_gfx10<bits<10> op> :
VOP3_Real_gfx7<op>, VOP3_Real_gfx10<op>;
multiclass VOP3be_Real_gfx7_gfx10<bits<10> op> :
VOP3be_Real_gfx7<op>, VOP3be_Real_gfx10<op>;
defm V_QSAD_PK_U16_U8 : VOP3_Real_gfx7_gfx10<0x172>;
defm V_MQSAD_U32_U8 : VOP3_Real_gfx7_gfx10<0x175>;
defm V_MAD_U64_U32 : VOP3be_Real_gfx7_gfx10<0x176>;
defm V_MAD_I64_I32 : VOP3be_Real_gfx7_gfx10<0x177>;
//===----------------------------------------------------------------------===//
// GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
multiclass VOP3_Real_gfx6_gfx7<bits<10> op> {
def _gfx6_gfx7 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> {
def _gfx6_gfx7 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
multiclass VOP3_Real_gfx6_gfx7_gfx10<bits<10> op> :
VOP3_Real_gfx6_gfx7<op>, VOP3_Real_gfx10<op>;
multiclass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> :
VOP3be_Real_gfx6_gfx7<op>, VOP3be_Real_gfx10<op>;
defm V_LSHL_B64 : VOP3_Real_gfx6_gfx7<0x161>;
defm V_LSHR_B64 : VOP3_Real_gfx6_gfx7<0x162>;
defm V_ASHR_I64 : VOP3_Real_gfx6_gfx7<0x163>;
defm V_MAD_LEGACY_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x140>;
defm V_MAD_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x141>;
defm V_MAD_I32_I24 : VOP3_Real_gfx6_gfx7_gfx10<0x142>;
defm V_MAD_U32_U24 : VOP3_Real_gfx6_gfx7_gfx10<0x143>;
defm V_CUBEID_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x144>;
defm V_CUBESC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x145>;
defm V_CUBETC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x146>;
defm V_CUBEMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x147>;
defm V_BFE_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x148>;
defm V_BFE_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x149>;
defm V_BFI_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
defm V_FMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
defm V_FMA_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
defm V_LERP_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
defm V_MULLIT_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
defm V_MIN3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
defm V_MIN3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
defm V_MIN3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x153>;
defm V_MAX3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x154>;
defm V_MAX3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x155>;
defm V_MAX3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x156>;
defm V_MED3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x157>;
defm V_MED3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x158>;
defm V_MED3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x159>;
defm V_SAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15a>;
defm V_SAD_HI_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15b>;
defm V_SAD_U16 : VOP3_Real_gfx6_gfx7_gfx10<0x15c>;
defm V_SAD_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x15d>;
defm V_CVT_PK_U8_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15e>;
defm V_DIV_FIXUP_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15f>;
defm V_DIV_FIXUP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x160>;
defm V_ADD_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x164>;
defm V_MUL_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x165>;
defm V_MIN_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x166>;
defm V_MAX_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x167>;
defm V_LDEXP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x168>;
defm V_MUL_LO_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x169>;
defm V_MUL_HI_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x16a>;
defm V_MUL_LO_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16b>;
defm V_MUL_HI_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16c>;
defm V_DIV_FMAS_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x16f>;
defm V_DIV_FMAS_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x170>;
defm V_MSAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x171>;
defm V_MQSAD_PK_U16_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x173>;
defm V_TRIG_PREOP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x174>;
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>;
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
//===----------------------------------------------------------------------===//
// GFX8, GFX9 (VI).

View File

@ -378,3 +378,37 @@ defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
} // End SubtargetPredicate = HasDot1Insts
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
multiclass VOP3P_Real_gfx10<bits<10> op> {
def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>;
defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>;
defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>;
defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>;
defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>;
defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>;
defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>;
defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>;
defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>;
defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>;
defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>;
defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>;
defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>;
defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>;
defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;

View File

@ -0,0 +1,96 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}shl_i16:
; GCN: v_lshlrev_b16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @shl_i16(i16 %x, i16 %y) {
%res = shl i16 %x, %y
ret i16 %res
}
; GCN-LABEL: {{^}}lshr_i16:
; GCN: v_lshrrev_b16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @lshr_i16(i16 %x, i16 %y) {
%res = lshr i16 %x, %y
ret i16 %res
}
; GCN-LABEL: {{^}}ashr_i16:
; GCN: v_ashrrev_i16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @ashr_i16(i16 %x, i16 %y) {
%res = ashr i16 %x, %y
ret i16 %res
}
; GCN-LABEL: {{^}}add_u16:
; GCN: v_add_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @add_u16(i16 %x, i16 %y) {
%res = add i16 %x, %y
ret i16 %res
}
; GCN-LABEL: {{^}}sub_u16:
; GCN: v_sub_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @sub_u16(i16 %x, i16 %y) {
%res = sub i16 %x, %y
ret i16 %res
}
; GCN-LABEL: {{^}}mul_lo_u16:
; GCN: v_mul_lo_u16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @mul_lo_u16(i16 %x, i16 %y) {
%res = mul i16 %x, %y
ret i16 %res
}
; GCN-LABEL: {{^}}min_u16:
; GCN: v_min_u16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @min_u16(i16 %x, i16 %y) {
%cmp = icmp ule i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
}
; GCN-LABEL: {{^}}min_i16:
; GCN: v_min_i16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @min_i16(i16 %x, i16 %y) {
%cmp = icmp sle i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
}
; GCN-LABEL: {{^}}max_u16:
; GCN: v_max_u16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @max_u16(i16 %x, i16 %y) {
%cmp = icmp uge i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
}
; GCN-LABEL: {{^}}max_i16:
; GCN: v_max_i16_e{{32|64}} [[OP:v[0-9]+]],
; GFX9-NEXT: s_setpc_b64
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
define i16 @max_i16(i16 %x, i16 %y) {
%cmp = icmp sge i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
}

View File

@ -1,7 +1,8 @@
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI %s
// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s
s_mov_b32 [ttmp5], [ttmp3]
// SICI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x03,0xf5,0xbe]
@ -110,3 +111,11 @@ flat_load_dwordx4 v[8/2+4:11/2+6], v[2:3]
flat_load_dwordx4 [v[8/2+4],v9,v[10],v[11/2+6]], v[2:3]
// VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08]
v_mul_f32 v0, null, v2
// NOSICIVI: error:
// GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10]
s_add_u32 null, null, null
// NOSICIVI: error:
// GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80]