forked from OSchip/llvm-project
X86: Match the SSE/AVX min/max vector ops using a custom node instead of intrinsics
This is very mechanical, no functionality change. Preparation for PR14667. llvm-svn: 170898
This commit is contained in:
parent
e30a84f08f
commit
4669d18893
|
@ -10154,6 +10154,40 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||
Op.getOperand(1), Op.getOperand(2));
|
||||
}
|
||||
|
||||
// SSE2/SSE41/AVX2 integer max/min intrinsics.
|
||||
case Intrinsic::x86_sse2_pmaxu_b:
|
||||
case Intrinsic::x86_sse41_pmaxuw:
|
||||
case Intrinsic::x86_sse41_pmaxud:
|
||||
case Intrinsic::x86_avx2_pmaxu_b:
|
||||
case Intrinsic::x86_avx2_pmaxu_w:
|
||||
case Intrinsic::x86_avx2_pmaxu_d:
|
||||
return DAG.getNode(X86ISD::UMAX, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::x86_sse2_pminu_b:
|
||||
case Intrinsic::x86_sse41_pminuw:
|
||||
case Intrinsic::x86_sse41_pminud:
|
||||
case Intrinsic::x86_avx2_pminu_b:
|
||||
case Intrinsic::x86_avx2_pminu_w:
|
||||
case Intrinsic::x86_avx2_pminu_d:
|
||||
return DAG.getNode(X86ISD::UMIN, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::x86_sse41_pmaxsb:
|
||||
case Intrinsic::x86_sse2_pmaxs_w:
|
||||
case Intrinsic::x86_sse41_pmaxsd:
|
||||
case Intrinsic::x86_avx2_pmaxs_b:
|
||||
case Intrinsic::x86_avx2_pmaxs_w:
|
||||
case Intrinsic::x86_avx2_pmaxs_d:
|
||||
return DAG.getNode(X86ISD::SMAX, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::x86_sse41_pminsb:
|
||||
case Intrinsic::x86_sse2_pmins_w:
|
||||
case Intrinsic::x86_sse41_pminsd:
|
||||
case Intrinsic::x86_avx2_pmins_b:
|
||||
case Intrinsic::x86_avx2_pmins_w:
|
||||
case Intrinsic::x86_avx2_pmins_d:
|
||||
return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
|
||||
// AVX2 variable shift intrinsics
|
||||
case Intrinsic::x86_avx2_psllv_d:
|
||||
case Intrinsic::x86_avx2_psllv_q:
|
||||
|
@ -11974,6 +12008,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::HSUB: return "X86ISD::HSUB";
|
||||
case X86ISD::FHADD: return "X86ISD::FHADD";
|
||||
case X86ISD::FHSUB: return "X86ISD::FHSUB";
|
||||
case X86ISD::UMAX: return "X86ISD::UMAX";
|
||||
case X86ISD::UMIN: return "X86ISD::UMIN";
|
||||
case X86ISD::SMAX: return "X86ISD::SMAX";
|
||||
case X86ISD::SMIN: return "X86ISD::SMIN";
|
||||
case X86ISD::FMAX: return "X86ISD::FMAX";
|
||||
case X86ISD::FMIN: return "X86ISD::FMIN";
|
||||
case X86ISD::FMAXC: return "X86ISD::FMAXC";
|
||||
|
|
|
@ -197,6 +197,12 @@ namespace llvm {
|
|||
/// FHSUB - Floating point horizontal sub.
|
||||
FHSUB,
|
||||
|
||||
/// UMAX, UMIN - Unsigned integer max and min.
|
||||
UMAX, UMIN,
|
||||
|
||||
/// SMAX, SMIN - Signed integer max and min.
|
||||
SMAX, SMIN,
|
||||
|
||||
/// FMAX, FMIN - Floating point max and min.
|
||||
///
|
||||
FMAX, FMIN,
|
||||
|
|
|
@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
|
|||
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
|
||||
SDTCisFP<1>, SDTCisVT<3, i8>]>;
|
||||
|
||||
def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
|
||||
def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
|
||||
def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
|
||||
def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
|
||||
|
||||
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
|
||||
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
|
||||
|
||||
|
|
|
@ -775,6 +775,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
|
||||
{ X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
|
||||
{ X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
|
||||
{ X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
|
||||
{ X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
|
||||
{ X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
|
||||
{ X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
|
||||
{ X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
|
||||
{ X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
|
||||
{ X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
|
||||
{ X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
|
||||
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
|
||||
{ X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
|
||||
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
|
||||
|
@ -951,6 +959,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||
{ X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
|
||||
{ X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
|
||||
{ X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
|
||||
{ X86::VPMINSBrr, X86::VPMINSBrm, TB_ALIGN_16 },
|
||||
{ X86::VPMINSDrr, X86::VPMINSDrm, TB_ALIGN_16 },
|
||||
{ X86::VPMINUDrr, X86::VPMINUDrm, TB_ALIGN_16 },
|
||||
{ X86::VPMINUWrr, X86::VPMINUWrm, TB_ALIGN_16 },
|
||||
{ X86::VPMAXSBrr, X86::VPMAXSBrm, TB_ALIGN_16 },
|
||||
{ X86::VPMAXSDrr, X86::VPMAXSDrm, TB_ALIGN_16 },
|
||||
{ X86::VPMAXUDrr, X86::VPMAXUDrm, TB_ALIGN_16 },
|
||||
{ X86::VPMAXUWrr, X86::VPMAXUWrm, TB_ALIGN_16 },
|
||||
{ X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
|
||||
{ X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 },
|
||||
{ X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
|
||||
|
@ -1092,6 +1108,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||
{ X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMINSBYrr, X86::VPMINSBYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMINSDYrr, X86::VPMINSDYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMINUDYrr, X86::VPMINUDYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMINUWYrr, X86::VPMINUWYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMAXSBYrr, X86::VPMAXSBYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMAXSDYrr, X86::VPMAXSDYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMAXUDYrr, X86::VPMAXUDYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMAXUWYrr, X86::VPMAXUWYrm, TB_ALIGN_32 },
|
||||
{ X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
|
||||
{ X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
|
||||
{ X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },
|
||||
|
|
|
@ -3733,6 +3733,14 @@ defm VPSUBUSW : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v8i16, VR128,
|
|||
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
|
||||
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
|
||||
VEX_4V;
|
||||
defm VPMINUB : PDI_binop_rm<0xDA, "vpminub", X86umin, v16i8, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMINSW : PDI_binop_rm<0xEA, "vpminsw", X86smin, v8i16, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMAXUB : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v16i8, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMAXSW : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v8i16, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
|
||||
// Intrinsic forms
|
||||
defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
|
||||
|
@ -3768,18 +3776,6 @@ defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
|
|||
defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
|
||||
|
@ -3813,6 +3809,18 @@ defm VPSUBUSWY : PDI_binop_rm<0xD9, "vpsubusw", X86subus, v16i16, VR256,
|
|||
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINUBY : PDI_binop_rm<0xDA, "vpminub", X86umin, v32i8,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINSWY : PDI_binop_rm<0xEA, "vpminsw", X86smin, v16i16,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXUBY : PDI_binop_rm<0xDE, "vpmaxub", X86umax, v32i8,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXSWY : PDI_binop_rm<0xEE, "vpmaxsw", X86smax, v16i16,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
|
||||
// Intrinsic forms
|
||||
defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
|
||||
|
@ -3848,18 +3856,6 @@ defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
|
|||
defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
|
||||
VR256, memopv4i64, i256mem,
|
||||
SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
|
||||
|
@ -3890,6 +3886,14 @@ defm PSUBUSW : PDI_binop_rm<0xD9, "psubusw", X86subus, v8i16, VR128, memopv2i64,
|
|||
i128mem, SSE_INTALU_ITINS_P>;
|
||||
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
|
||||
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
|
||||
defm PMINUB : PDI_binop_rm<0xDA, "pminub", X86umin, v16i8, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMINSW : PDI_binop_rm<0xEA, "pminsw", X86smin, v8i16, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMAXUB : PDI_binop_rm<0xDE, "pmaxub", X86umax, v16i8, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMAXSW : PDI_binop_rm<0xEE, "pmaxsw", X86smax, v8i16, VR128, memopv2i64,
|
||||
i128mem, SSE_INTALU_ITINS_P, 1>;
|
||||
|
||||
// Intrinsic forms
|
||||
defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
|
||||
|
@ -3925,18 +3929,6 @@ defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
|
|||
defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
|
||||
VR128, memopv2i64, i128mem,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
|
@ -6701,67 +6693,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
|
|||
(bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let isCommutable = 0 in
|
||||
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
|
||||
0>, VEX_4V;
|
||||
defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
|
||||
0>, VEX_4V;
|
||||
defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
|
||||
0>, VEX_4V;
|
||||
defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
|
||||
0>, VEX_4V;
|
||||
defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
|
||||
0>, VEX_4V;
|
||||
defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
|
||||
0>, VEX_4V;
|
||||
defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
|
||||
0>, VEX_4V;
|
||||
defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
|
||||
0>, VEX_4V;
|
||||
defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
|
||||
0>, VEX_4V;
|
||||
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
|
||||
0>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let isCommutable = 0 in
|
||||
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
|
||||
int_x86_avx2_packusdw>, VEX_4V, VEX_L;
|
||||
defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
|
||||
int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
|
||||
defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
|
||||
int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
|
||||
defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
|
||||
int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
|
||||
defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
|
||||
int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
|
||||
defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
|
||||
int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
|
||||
defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
|
||||
int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
|
||||
defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
|
||||
int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
|
||||
defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
|
||||
int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
|
||||
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
|
||||
int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in
|
||||
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
|
||||
defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
|
||||
defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
|
||||
defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
|
||||
defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
|
||||
defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
|
||||
defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
|
||||
defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
|
||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
|
||||
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
|
||||
}
|
||||
|
||||
/// SS48I_binop_rm - Simple SSE41 binary operator.
|
||||
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -6784,6 +6715,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(bitconvert (memop_frag addr:$src2)))))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let isCommutable = 0 in
|
||||
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
|
||||
0>, VEX_4V;
|
||||
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
|
||||
0>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let isCommutable = 0 in
|
||||
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
|
||||
int_x86_avx2_packusdw>, VEX_4V, VEX_L;
|
||||
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
|
||||
memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
|
||||
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
|
||||
int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in
|
||||
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
|
||||
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
|
||||
memopv2i64, i128mem>;
|
||||
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
|
||||
memopv2i64, i128mem, 0>, VEX_4V;
|
||||
|
|
Loading…
Reference in New Issue