forked from OSchip/llvm-project
[AMDGPU] gfx1010 VOP3 and VOP3P implementation
Differential Revision: https://reviews.llvm.org/D61202 llvm-svn: 359328
This commit is contained in:
parent
18bc872405
commit
61beff020e
|
@ -2691,32 +2691,41 @@ static bool IsRevOpcode(const unsigned Opcode)
|
|||
case AMDGPU::V_LSHLREV_B16_e64:
|
||||
case AMDGPU::V_LSHLREV_B16_e32_vi:
|
||||
case AMDGPU::V_LSHLREV_B16_e64_vi:
|
||||
case AMDGPU::V_LSHLREV_B16_gfx10:
|
||||
|
||||
case AMDGPU::V_LSHRREV_B16_e32:
|
||||
case AMDGPU::V_LSHRREV_B16_e64:
|
||||
case AMDGPU::V_LSHRREV_B16_e32_vi:
|
||||
case AMDGPU::V_LSHRREV_B16_e64_vi:
|
||||
case AMDGPU::V_LSHRREV_B16_gfx10:
|
||||
|
||||
case AMDGPU::V_ASHRREV_I16_e32:
|
||||
case AMDGPU::V_ASHRREV_I16_e64:
|
||||
case AMDGPU::V_ASHRREV_I16_e32_vi:
|
||||
case AMDGPU::V_ASHRREV_I16_e64_vi:
|
||||
case AMDGPU::V_ASHRREV_I16_gfx10:
|
||||
|
||||
case AMDGPU::V_LSHLREV_B64:
|
||||
case AMDGPU::V_LSHLREV_B64_gfx10:
|
||||
case AMDGPU::V_LSHLREV_B64_vi:
|
||||
|
||||
case AMDGPU::V_LSHRREV_B64:
|
||||
case AMDGPU::V_LSHRREV_B64_gfx10:
|
||||
case AMDGPU::V_LSHRREV_B64_vi:
|
||||
|
||||
case AMDGPU::V_ASHRREV_I64:
|
||||
case AMDGPU::V_ASHRREV_I64_gfx10:
|
||||
case AMDGPU::V_ASHRREV_I64_vi:
|
||||
|
||||
case AMDGPU::V_PK_LSHLREV_B16:
|
||||
case AMDGPU::V_PK_LSHLREV_B16_gfx10:
|
||||
case AMDGPU::V_PK_LSHLREV_B16_vi:
|
||||
|
||||
case AMDGPU::V_PK_LSHRREV_B16:
|
||||
case AMDGPU::V_PK_LSHRREV_B16_gfx10:
|
||||
case AMDGPU::V_PK_LSHRREV_B16_vi:
|
||||
case AMDGPU::V_PK_ASHRREV_I16:
|
||||
case AMDGPU::V_PK_ASHRREV_I16_gfx10:
|
||||
case AMDGPU::V_PK_ASHRREV_I16_vi:
|
||||
return true;
|
||||
default:
|
||||
|
|
|
@ -2820,9 +2820,11 @@ static bool shouldReadExec(const MachineInstr &MI) {
|
|||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::V_READLANE_B32:
|
||||
case AMDGPU::V_READLANE_B32_gfx6_gfx7:
|
||||
case AMDGPU::V_READLANE_B32_gfx10:
|
||||
case AMDGPU::V_READLANE_B32_vi:
|
||||
case AMDGPU::V_WRITELANE_B32:
|
||||
case AMDGPU::V_WRITELANE_B32_gfx6_gfx7:
|
||||
case AMDGPU::V_WRITELANE_B32_gfx10:
|
||||
case AMDGPU::V_WRITELANE_B32_vi:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -143,15 +143,6 @@ def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
|
|||
}
|
||||
}
|
||||
|
||||
class getVOP3VCC<VOPProfile P, SDPatternOperator node> {
|
||||
list<dag> ret =
|
||||
[(set P.DstVT:$vdst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
|
||||
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
|
||||
(i1 VCC)))];
|
||||
}
|
||||
|
||||
class VOP3Features<bit Clamp, bit OpSel, bit Packed> {
|
||||
bit HasClamp = Clamp;
|
||||
bit HasOpSel = OpSel;
|
||||
|
@ -315,8 +306,7 @@ let Uses = [VCC, EXEC] in {
|
|||
// if (vcc)
|
||||
// result *= 2^32
|
||||
//
|
||||
def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
|
||||
getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
|
||||
def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []> {
|
||||
let SchedRW = [WriteFloatFMA];
|
||||
}
|
||||
// v_div_fmas_f64:
|
||||
|
@ -324,8 +314,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
|
|||
// if (vcc)
|
||||
// result *= 2^64
|
||||
//
|
||||
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
|
||||
getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
|
||||
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []> {
|
||||
let SchedRW = [WriteDouble];
|
||||
let FPDPRounding = 1;
|
||||
}
|
||||
|
@ -386,12 +375,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
|
|||
}
|
||||
|
||||
let SchedRW = [Write64Bit] in {
|
||||
let SubtargetPredicate = isGFX6GFX7, Predicates = [isGFX6GFX7] in {
|
||||
let SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] in {
|
||||
def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
|
||||
def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
|
||||
def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
|
||||
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
|
||||
} // End SubtargetPredicate = isGFX6GFX7, Predicates = [isGFX6GFX7]
|
||||
} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10]
|
||||
|
||||
let SubtargetPredicate = isGFX8Plus in {
|
||||
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
|
||||
|
@ -519,7 +508,7 @@ def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
|
|||
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
|
||||
} // End SubtargetPredicate = isGFX8GFX9
|
||||
|
||||
let Predicates = [Has16BitInsts] in {
|
||||
let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
|
||||
|
||||
multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
|
||||
Instruction inst, SDPatternOperator op3> {
|
||||
|
@ -533,7 +522,23 @@ def : GCNPat <
|
|||
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
|
||||
defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
|
||||
|
||||
} // End Predicates = [Has16BitInsts]
|
||||
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
|
||||
|
||||
let Predicates = [Has16BitInsts, isGFX10Plus] in {
|
||||
|
||||
multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
|
||||
Instruction inst, SDPatternOperator op3> {
|
||||
def : GCNPat <
|
||||
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
|
||||
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
|
||||
>;
|
||||
|
||||
}
|
||||
|
||||
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9, zext>;
|
||||
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9, sext>;
|
||||
|
||||
} // End Predicates = [Has16BitInsts, isGFX10Plus]
|
||||
|
||||
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
|
||||
(ops node:$x, node:$y, node:$z),
|
||||
|
@ -618,6 +623,10 @@ def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
|
|||
|
||||
} // End SubtargetPredicate = isGFX9Plus
|
||||
|
||||
let SubtargetPredicate = isGFX10Plus in {
|
||||
def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
} // End SubtargetPredicate = isGFX10Plus
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Integer Clamp Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -664,100 +673,225 @@ def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX6, GFX7.
|
||||
// GFX10.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in {
|
||||
|
||||
multiclass VOP3_Real_si<bits<9> op> {
|
||||
def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
multiclass VOP3be_Real_si<bits<9> op> {
|
||||
def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3be_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
|
||||
} // End AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7"
|
||||
|
||||
defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>;
|
||||
defm V_MAD_F32 : VOP3_Real_si <0x141>;
|
||||
defm V_MAD_I32_I24 : VOP3_Real_si <0x142>;
|
||||
defm V_MAD_U32_U24 : VOP3_Real_si <0x143>;
|
||||
defm V_CUBEID_F32 : VOP3_Real_si <0x144>;
|
||||
defm V_CUBESC_F32 : VOP3_Real_si <0x145>;
|
||||
defm V_CUBETC_F32 : VOP3_Real_si <0x146>;
|
||||
defm V_CUBEMA_F32 : VOP3_Real_si <0x147>;
|
||||
defm V_BFE_U32 : VOP3_Real_si <0x148>;
|
||||
defm V_BFE_I32 : VOP3_Real_si <0x149>;
|
||||
defm V_BFI_B32 : VOP3_Real_si <0x14a>;
|
||||
defm V_FMA_F32 : VOP3_Real_si <0x14b>;
|
||||
defm V_FMA_F64 : VOP3_Real_si <0x14c>;
|
||||
defm V_LERP_U8 : VOP3_Real_si <0x14d>;
|
||||
defm V_ALIGNBIT_B32 : VOP3_Real_si <0x14e>;
|
||||
defm V_ALIGNBYTE_B32 : VOP3_Real_si <0x14f>;
|
||||
defm V_MULLIT_F32 : VOP3_Real_si <0x150>;
|
||||
defm V_MIN3_F32 : VOP3_Real_si <0x151>;
|
||||
defm V_MIN3_I32 : VOP3_Real_si <0x152>;
|
||||
defm V_MIN3_U32 : VOP3_Real_si <0x153>;
|
||||
defm V_MAX3_F32 : VOP3_Real_si <0x154>;
|
||||
defm V_MAX3_I32 : VOP3_Real_si <0x155>;
|
||||
defm V_MAX3_U32 : VOP3_Real_si <0x156>;
|
||||
defm V_MED3_F32 : VOP3_Real_si <0x157>;
|
||||
defm V_MED3_I32 : VOP3_Real_si <0x158>;
|
||||
defm V_MED3_U32 : VOP3_Real_si <0x159>;
|
||||
defm V_SAD_U8 : VOP3_Real_si <0x15a>;
|
||||
defm V_SAD_HI_U8 : VOP3_Real_si <0x15b>;
|
||||
defm V_SAD_U16 : VOP3_Real_si <0x15c>;
|
||||
defm V_SAD_U32 : VOP3_Real_si <0x15d>;
|
||||
defm V_CVT_PK_U8_F32 : VOP3_Real_si <0x15e>;
|
||||
defm V_DIV_FIXUP_F32 : VOP3_Real_si <0x15f>;
|
||||
defm V_DIV_FIXUP_F64 : VOP3_Real_si <0x160>;
|
||||
defm V_LSHL_B64 : VOP3_Real_si <0x161>;
|
||||
defm V_LSHR_B64 : VOP3_Real_si <0x162>;
|
||||
defm V_ASHR_I64 : VOP3_Real_si <0x163>;
|
||||
defm V_ADD_F64 : VOP3_Real_si <0x164>;
|
||||
defm V_MUL_F64 : VOP3_Real_si <0x165>;
|
||||
defm V_MIN_F64 : VOP3_Real_si <0x166>;
|
||||
defm V_MAX_F64 : VOP3_Real_si <0x167>;
|
||||
defm V_LDEXP_F64 : VOP3_Real_si <0x168>;
|
||||
defm V_MUL_LO_U32 : VOP3_Real_si <0x169>;
|
||||
defm V_MUL_HI_U32 : VOP3_Real_si <0x16a>;
|
||||
defm V_MUL_LO_I32 : VOP3_Real_si <0x16b>;
|
||||
defm V_MUL_HI_I32 : VOP3_Real_si <0x16c>;
|
||||
defm V_DIV_SCALE_F32 : VOP3be_Real_si <0x16d>;
|
||||
defm V_DIV_SCALE_F64 : VOP3be_Real_si <0x16e>;
|
||||
defm V_DIV_FMAS_F32 : VOP3_Real_si <0x16f>;
|
||||
defm V_DIV_FMAS_F64 : VOP3_Real_si <0x170>;
|
||||
defm V_MSAD_U8 : VOP3_Real_si <0x171>;
|
||||
defm V_MQSAD_PK_U16_U8 : VOP3_Real_si <0x173>;
|
||||
defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX7.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass VOP3_Real_ci<bits<9> op> {
|
||||
def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
|
||||
let AssemblerPredicates = [isGFX7Only];
|
||||
let DecoderNamespace = "GFX7";
|
||||
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
||||
multiclass VOP3_Real_gfx10<bits<10> op> {
|
||||
def _gfx10 :
|
||||
VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>,
|
||||
VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VOP3be_Real_ci<bits<9> op> {
|
||||
def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3be_gfx6_gfx7 <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
|
||||
let AssemblerPredicates = [isGFX7Only];
|
||||
let DecoderNamespace = "GFX7";
|
||||
multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName,
|
||||
string asmName> {
|
||||
def _gfx10 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
|
||||
VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
|
||||
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
|
||||
let AsmString = asmName # ps.AsmOperands;
|
||||
}
|
||||
}
|
||||
}
|
||||
multiclass VOP3be_Real_gfx10<bits<10> op> {
|
||||
def _gfx10 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
|
||||
VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
multiclass VOP3Interp_Real_gfx10<bits<10> op> {
|
||||
def _gfx10 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
|
||||
VOP3Interp_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
multiclass VOP3OpSel_Real_gfx10<bits<10> op> {
|
||||
def _gfx10 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
|
||||
VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName,
|
||||
string asmName> {
|
||||
def _gfx10 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
|
||||
VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
|
||||
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
|
||||
let AsmString = asmName # ps.AsmOperands;
|
||||
}
|
||||
}
|
||||
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
|
||||
|
||||
defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>;
|
||||
defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>;
|
||||
defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>;
|
||||
defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
|
||||
defm V_READLANE_B32 : VOP3_Real_gfx10<0x360>;
|
||||
|
||||
let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
|
||||
defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>;
|
||||
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
|
||||
|
||||
defm V_XOR3_B32 : VOP3_Real_gfx10<0x178>;
|
||||
defm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>;
|
||||
defm V_LSHRREV_B64 : VOP3_Real_gfx10<0x300>;
|
||||
defm V_ASHRREV_I64 : VOP3_Real_gfx10<0x301>;
|
||||
defm V_PERM_B32 : VOP3_Real_gfx10<0x344>;
|
||||
defm V_XAD_U32 : VOP3_Real_gfx10<0x345>;
|
||||
defm V_LSHL_ADD_U32 : VOP3_Real_gfx10<0x346>;
|
||||
defm V_ADD_LSHL_U32 : VOP3_Real_gfx10<0x347>;
|
||||
defm V_ADD3_U32 : VOP3_Real_gfx10<0x36d>;
|
||||
defm V_LSHL_OR_B32 : VOP3_Real_gfx10<0x36f>;
|
||||
defm V_AND_OR_B32 : VOP3_Real_gfx10<0x371>;
|
||||
defm V_OR3_B32 : VOP3_Real_gfx10<0x372>;
|
||||
|
||||
// TODO-GFX10: add MC tests for v_add/sub_nc_i16
|
||||
defm V_ADD_NC_I16 :
|
||||
VOP3OpSel_Real_gfx10_with_name<0x30d, "V_ADD_I16", "v_add_nc_i16">;
|
||||
defm V_SUB_NC_I16 :
|
||||
VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
|
||||
defm V_SUB_NC_I32 :
|
||||
VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">;
|
||||
defm V_ADD_NC_I32 :
|
||||
VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
|
||||
|
||||
defm V_INTERP_P1LL_F16 : VOP3Interp_Real_gfx10<0x342>;
|
||||
defm V_INTERP_P1LV_F16 : VOP3Interp_Real_gfx10<0x343>;
|
||||
defm V_INTERP_P2_F16 : VOP3Interp_Real_gfx10<0x35a>;
|
||||
|
||||
defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx10<0x311>;
|
||||
defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx10<0x312>;
|
||||
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx10<0x313>;
|
||||
|
||||
defm V_MIN3_F16 : VOP3OpSel_Real_gfx10<0x351>;
|
||||
defm V_MIN3_I16 : VOP3OpSel_Real_gfx10<0x352>;
|
||||
defm V_MIN3_U16 : VOP3OpSel_Real_gfx10<0x353>;
|
||||
defm V_MAX3_F16 : VOP3OpSel_Real_gfx10<0x354>;
|
||||
defm V_MAX3_I16 : VOP3OpSel_Real_gfx10<0x355>;
|
||||
defm V_MAX3_U16 : VOP3OpSel_Real_gfx10<0x356>;
|
||||
defm V_MED3_F16 : VOP3OpSel_Real_gfx10<0x357>;
|
||||
defm V_MED3_I16 : VOP3OpSel_Real_gfx10<0x358>;
|
||||
defm V_MED3_U16 : VOP3OpSel_Real_gfx10<0x359>;
|
||||
defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx10<0x373>;
|
||||
defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx10<0x375>;
|
||||
|
||||
defm V_MAD_U16 :
|
||||
VOP3OpSel_Real_gfx10_with_name<0x340, "V_MAD_U16_gfx9", "v_mad_u16">;
|
||||
defm V_FMA_F16 :
|
||||
VOP3OpSel_Real_gfx10_with_name<0x34b, "V_FMA_F16_gfx9", "v_fma_f16">;
|
||||
defm V_MAD_I16 :
|
||||
VOP3OpSel_Real_gfx10_with_name<0x35e, "V_MAD_I16_gfx9", "v_mad_i16">;
|
||||
defm V_DIV_FIXUP_F16 :
|
||||
VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
|
||||
|
||||
// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
|
||||
// (they do not support SDWA or DPP).
|
||||
defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16_e64", "v_add_nc_u16">;
|
||||
defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16_e64", "v_sub_nc_u16">;
|
||||
defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_e64", "v_mul_lo_u16">;
|
||||
defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_e64", "v_lshrrev_b16">;
|
||||
defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_e64", "v_ashrrev_i16">;
|
||||
defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16_e64", "v_max_u16">;
|
||||
defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16_e64", "v_max_i16">;
|
||||
defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16_e64", "v_min_u16">;
|
||||
defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16_e64", "v_min_i16">;
|
||||
defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_e64", "v_lshlrev_b16">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX7, GFX10.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
|
||||
multiclass VOP3_Real_gfx7<bits<10> op> {
|
||||
def _gfx7 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
multiclass VOP3be_Real_gfx7<bits<10> op> {
|
||||
def _gfx7 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
|
||||
|
||||
multiclass VOP3_Real_gfx7_gfx10<bits<10> op> :
|
||||
VOP3_Real_gfx7<op>, VOP3_Real_gfx10<op>;
|
||||
|
||||
multiclass VOP3be_Real_gfx7_gfx10<bits<10> op> :
|
||||
VOP3be_Real_gfx7<op>, VOP3be_Real_gfx10<op>;
|
||||
|
||||
defm V_QSAD_PK_U16_U8 : VOP3_Real_gfx7_gfx10<0x172>;
|
||||
defm V_MQSAD_U32_U8 : VOP3_Real_gfx7_gfx10<0x175>;
|
||||
defm V_MAD_U64_U32 : VOP3be_Real_gfx7_gfx10<0x176>;
|
||||
defm V_MAD_I64_I32 : VOP3be_Real_gfx7_gfx10<0x177>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX6, GFX7, GFX10.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
|
||||
multiclass VOP3_Real_gfx6_gfx7<bits<10> op> {
|
||||
def _gfx6_gfx7 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> {
|
||||
def _gfx6_gfx7 :
|
||||
VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
|
||||
VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
|
||||
|
||||
multiclass VOP3_Real_gfx6_gfx7_gfx10<bits<10> op> :
|
||||
VOP3_Real_gfx6_gfx7<op>, VOP3_Real_gfx10<op>;
|
||||
|
||||
multiclass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> :
|
||||
VOP3be_Real_gfx6_gfx7<op>, VOP3be_Real_gfx10<op>;
|
||||
|
||||
defm V_LSHL_B64 : VOP3_Real_gfx6_gfx7<0x161>;
|
||||
defm V_LSHR_B64 : VOP3_Real_gfx6_gfx7<0x162>;
|
||||
defm V_ASHR_I64 : VOP3_Real_gfx6_gfx7<0x163>;
|
||||
|
||||
defm V_MAD_LEGACY_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x140>;
|
||||
defm V_MAD_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x141>;
|
||||
defm V_MAD_I32_I24 : VOP3_Real_gfx6_gfx7_gfx10<0x142>;
|
||||
defm V_MAD_U32_U24 : VOP3_Real_gfx6_gfx7_gfx10<0x143>;
|
||||
defm V_CUBEID_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x144>;
|
||||
defm V_CUBESC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x145>;
|
||||
defm V_CUBETC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x146>;
|
||||
defm V_CUBEMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x147>;
|
||||
defm V_BFE_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x148>;
|
||||
defm V_BFE_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x149>;
|
||||
defm V_BFI_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
|
||||
defm V_FMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
|
||||
defm V_FMA_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
|
||||
defm V_LERP_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
|
||||
defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
|
||||
defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
|
||||
defm V_MULLIT_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
|
||||
defm V_MIN3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
|
||||
defm V_MIN3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
|
||||
defm V_MIN3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x153>;
|
||||
defm V_MAX3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x154>;
|
||||
defm V_MAX3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x155>;
|
||||
defm V_MAX3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x156>;
|
||||
defm V_MED3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x157>;
|
||||
defm V_MED3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x158>;
|
||||
defm V_MED3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x159>;
|
||||
defm V_SAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15a>;
|
||||
defm V_SAD_HI_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15b>;
|
||||
defm V_SAD_U16 : VOP3_Real_gfx6_gfx7_gfx10<0x15c>;
|
||||
defm V_SAD_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x15d>;
|
||||
defm V_CVT_PK_U8_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15e>;
|
||||
defm V_DIV_FIXUP_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15f>;
|
||||
defm V_DIV_FIXUP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x160>;
|
||||
defm V_ADD_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x164>;
|
||||
defm V_MUL_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x165>;
|
||||
defm V_MIN_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x166>;
|
||||
defm V_MAX_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x167>;
|
||||
defm V_LDEXP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x168>;
|
||||
defm V_MUL_LO_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x169>;
|
||||
defm V_MUL_HI_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x16a>;
|
||||
defm V_MUL_LO_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16b>;
|
||||
defm V_MUL_HI_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16c>;
|
||||
defm V_DIV_FMAS_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x16f>;
|
||||
defm V_DIV_FMAS_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x170>;
|
||||
defm V_MSAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x171>;
|
||||
defm V_MQSAD_PK_U16_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x173>;
|
||||
defm V_TRIG_PREOP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x174>;
|
||||
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>;
|
||||
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX8, GFX9 (VI).
|
||||
|
|
|
@ -378,3 +378,37 @@ defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
|
|||
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot1Insts
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX10.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
||||
multiclass VOP3P_Real_gfx10<bits<10> op> {
|
||||
def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
|
||||
VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
|
||||
}
|
||||
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
|
||||
|
||||
defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>;
|
||||
defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>;
|
||||
defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>;
|
||||
defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>;
|
||||
defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
|
||||
defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
|
||||
defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
|
||||
defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>;
|
||||
defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>;
|
||||
defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>;
|
||||
defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>;
|
||||
defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>;
|
||||
defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>;
|
||||
defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>;
|
||||
defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>;
|
||||
defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>;
|
||||
defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>;
|
||||
defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>;
|
||||
defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
|
||||
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
|
||||
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
|
||||
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
|
||||
|
||||
; GCN-LABEL: {{^}}shl_i16:
|
||||
; GCN: v_lshlrev_b16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @shl_i16(i16 %x, i16 %y) {
|
||||
%res = shl i16 %x, %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_i16:
|
||||
; GCN: v_lshrrev_b16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @lshr_i16(i16 %x, i16 %y) {
|
||||
%res = lshr i16 %x, %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}ashr_i16:
|
||||
; GCN: v_ashrrev_i16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @ashr_i16(i16 %x, i16 %y) {
|
||||
%res = ashr i16 %x, %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_u16:
|
||||
; GCN: v_add_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @add_u16(i16 %x, i16 %y) {
|
||||
%res = add i16 %x, %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sub_u16:
|
||||
; GCN: v_sub_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @sub_u16(i16 %x, i16 %y) {
|
||||
%res = sub i16 %x, %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mul_lo_u16:
|
||||
; GCN: v_mul_lo_u16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @mul_lo_u16(i16 %x, i16 %y) {
|
||||
%res = mul i16 %x, %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}min_u16:
|
||||
; GCN: v_min_u16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @min_u16(i16 %x, i16 %y) {
|
||||
%cmp = icmp ule i16 %x, %y
|
||||
%res = select i1 %cmp, i16 %x, i16 %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}min_i16:
|
||||
; GCN: v_min_i16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @min_i16(i16 %x, i16 %y) {
|
||||
%cmp = icmp sle i16 %x, %y
|
||||
%res = select i1 %cmp, i16 %x, i16 %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}max_u16:
|
||||
; GCN: v_max_u16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @max_u16(i16 %x, i16 %y) {
|
||||
%cmp = icmp uge i16 %x, %y
|
||||
%res = select i1 %cmp, i16 %x, i16 %y
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}max_i16:
|
||||
; GCN: v_max_i16_e{{32|64}} [[OP:v[0-9]+]],
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX10: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
|
||||
define i16 @max_i16(i16 %x, i16 %y) {
|
||||
%cmp = icmp sge i16 %x, %y
|
||||
%res = select i1 %cmp, i16 %x, i16 %y
|
||||
ret i16 %res
|
||||
}
|
|
@ -1,7 +1,8 @@
|
|||
// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s
|
||||
|
||||
s_mov_b32 [ttmp5], [ttmp3]
|
||||
// SICI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x03,0xf5,0xbe]
|
||||
|
@ -110,3 +111,11 @@ flat_load_dwordx4 v[8/2+4:11/2+6], v[2:3]
|
|||
|
||||
flat_load_dwordx4 [v[8/2+4],v9,v[10],v[11/2+6]], v[2:3]
|
||||
// VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08]
|
||||
|
||||
v_mul_f32 v0, null, v2
|
||||
// NOSICIVI: error:
|
||||
// GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10]
|
||||
|
||||
s_add_u32 null, null, null
|
||||
// NOSICIVI: error:
|
||||
// GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80]
|
||||
|
|
Loading…
Reference in New Issue